数字を分類するニューラルネットワークの実装をやってみる２-３

ニューラルネットワークと深層学習の６章にあるnetwork3.pyをubuntu 16.04 + Python3.xでやってみます。

畳み込みニューラルネットワークの実装です。

ソース　GitHub

Python 2.x -> Python 3.xへの主なコード修正はこのページ参照

network3.pyの修正コードは以下参照

network3.pyでは機械学習用ライブラリとしてTheanoを使っています。

Theanoのインストール

Kerasをubuntu 16.04 LTS へインストールのページにTheanoのインストも書いています。

Theanoはもう開発中止になっているので、別途Keras + TensorFlowでもやってみる予定。

ただ現状、TheanoもMNISTもこれ以上やる意味はないので、思案中。

感覚というか勘というか….を養う意味でいろいろいじってみるのは重要か？

Jetson Nano（ubuntu 18.04 LTS）でも実行できます。

network3.pyで100のニューロンを含む隠れ層を1つだけを持つ浅いネットワークを
エポック数60、学習率0.1、ミニバッチサイズ10、正規化なしの条件で実行してみます。

実行用ファイル

run3.py

import network3

from network3 import Network

from network3 import ConvPoolLayer, FullyConnectedLayer, SoftmaxLayer

training_data, validation_data, test_data = network3.load_data_shared()

mini_batch_size = 10

net = Network([FullyConnectedLayer(n_in=784, n_out=100),SoftmaxLayer(n_in=100, n_out=10)], mini_batch_size)

net.SGD(training_data, 60, mini_batch_size, 0.1,validation_data, test_data)

import network3

from network3 import Network

from network3 import ConvPoolLayer, FullyConnectedLayer, SoftmaxLayer

training_data, validation_data, test_data = network3.load_data_shared()

mini_batch_size = 10

net = Network([FullyConnectedLayer(n_in=784, n_out=100),SoftmaxLayer(n_in=100, n_out=10)], mini_batch_size)

net.SGD(training_data, 60, mini_batch_size, 0.1,validation_data, test_data)

＄ python3 run3.py

テキストとほぼ同じ精度です(が、ここでもnetworkやnetwor2と同様に重みとバイアスはランダムに初期化されているので、たまたまだと思います)。

network3.py

Jetson NanoのようにGPUが使える環境ではGPU = Trueですが、その際Print文はコメントアウトしておきましょう。

import pickle as cPickle

import gzip

import numpy as np
import theano
import theano.tensor as T
from theano.tensor.nnet import conv
from theano.tensor.nnet import softmax
from theano.tensor import shared_randomstreams

#from theano.tensor.signal import downsample
from theano.tensor.signal.pool import pool_2d


def linear(z): return z
def ReLU(z): return T.maximum(0.0, z)
from theano.tensor.nnet import sigmoid
from theano.tensor import tanh


GPU = True

if GPU:
    #print ("Trying to run under a GPU.  If this is not desired, then modify "+\
    #    "network3.py\nto set the GPU flag to False.")
    try: theano.config.device = 'gpu'
    except: pass
    theano.config.floatX = 'float32'
else:
    print ("Running with a CPU.  If this is not desired, then the modify "+\
        "network3.py to set\nthe GPU flag to True.")


def load_data_shared(filename="../data/mnist.pkl.gz"):
    f = gzip.open(filename, 'rb')
    training_data, validation_data, test_data = cPickle.load(f,encoding='latin1')
    f.close()
    def shared(data):

        shared_x = theano.shared(
            np.asarray(data[0], dtype=theano.config.floatX), borrow=True)
        shared_y = theano.shared(
            np.asarray(data[1], dtype=theano.config.floatX), borrow=True)
        return shared_x, T.cast(shared_y, "int32")
    return [shared(training_data), shared(validation_data), shared(test_data)]


class Network(object):

    def __init__(self, layers, mini_batch_size):

        self.layers = layers
        self.mini_batch_size = mini_batch_size
        self.params = [param for layer in self.layers for param in layer.params]
        self.x = T.matrix("x")
        self.y = T.ivector("y")
        init_layer = self.layers[0]
        init_layer.set_inpt(self.x, self.x, self.mini_batch_size)
        for j in range(1, len(self.layers)):
            prev_layer, layer  = self.layers[j-1], self.layers[j]
            layer.set_inpt(
                prev_layer.output, prev_layer.output_dropout, self.mini_batch_size)
        self.output = self.layers[-1].output
        self.output_dropout = self.layers[-1].output_dropout

    def SGD(self, training_data, epochs, mini_batch_size, eta,
            validation_data, test_data, lmbda=0.0):

        training_x, training_y = training_data
        validation_x, validation_y = validation_data
        test_x, test_y = test_data

        num_training_batches = size(training_data)/mini_batch_size
        num_validation_batches = size(validation_data)/mini_batch_size
        num_test_batches = size(test_data)/mini_batch_size

        l2_norm_squared = sum([(layer.w**2).sum() for layer in self.layers])
        cost = self.layers[-1].cost(self)+\
               0.5*lmbda*l2_norm_squared/num_training_batches
        grads = T.grad(cost, self.params)
        updates = [(param, param-eta*grad)
                   for param, grad in zip(self.params, grads)]

        i = T.lscalar()
        train_mb = theano.function(
            [i], cost, updates=updates,
            givens={
                self.x:
                training_x[i*self.mini_batch_size: (i+1)*self.mini_batch_size],
                self.y:
                training_y[i*self.mini_batch_size: (i+1)*self.mini_batch_size]
            })
        validate_mb_accuracy = theano.function(
            [i], self.layers[-1].accuracy(self.y),
            givens={
                self.x:
                validation_x[i*self.mini_batch_size: (i+1)*self.mini_batch_size],
                self.y:
                validation_y[i*self.mini_batch_size: (i+1)*self.mini_batch_size]
            })
        test_mb_accuracy = theano.function(
            [i], self.layers[-1].accuracy(self.y),
            givens={
                self.x:
                test_x[i*self.mini_batch_size: (i+1)*self.mini_batch_size],
                self.y:
                test_y[i*self.mini_batch_size: (i+1)*self.mini_batch_size]
            })
        self.test_mb_predictions = theano.function(
            [i], self.layers[-1].y_out,
            givens={
                self.x:
                test_x[i*self.mini_batch_size: (i+1)*self.mini_batch_size]
            })

        best_validation_accuracy = 0.0
        for epoch in range(epochs):
            #for minibatch_index in range(num_training_batches):
            for minibatch_index in range(int(num_training_batches)):
                iteration = num_training_batches*epoch+minibatch_index
                if iteration % 1000 == 0:
                    print("Training mini-batch number {0}".format(iteration))
                cost_ij = train_mb(minibatch_index)
                if (iteration+1) % num_training_batches == 0:
                    #validation_accuracy = np.mean([validate_mb_accuracy(j) for j in range(num_validation_batches)])
                    validation_accuracy = np.mean([validate_mb_accuracy(j) for j in range(ini(num_validation_batches))])
                    
                    print("Epoch {0}: validation accuracy {1:.2%}".format(
                        epoch, validation_accuracy))
                    if validation_accuracy &gt;= best_validation_accuracy:
                        print("This is the best validation accuracy to date.")
                        best_validation_accuracy = validation_accuracy
                        best_iteration = iteration
                        if test_data:
                            #test_accuracy = np.mean([test_mb_accuracy(j) for j in range(num_test_batches)])
                            test_accuracy = np.mean([test_mb_accuracy(j) for j in range(ini(num_test_batches))])
                            
                            print('The corresponding test accuracy is {0:.2%}'.format(
                                test_accuracy))
        print("Finished training network.")
        print("Best validation accuracy of {0:.2%} obtained at iteration {1}".format(
            best_validation_accuracy, best_iteration))
        print("Corresponding test accuracy of {0:.2%}".format(test_accuracy))


class ConvPoolLayer(object):

    def __init__(self, filter_shape, image_shape, poolsize=(2, 2),
                 activation_fn=sigmoid):

        self.filter_shape = filter_shape
        self.image_shape = image_shape
        self.poolsize = poolsize
        self.activation_fn=activation_fn
        
        n_out = (filter_shape[0]*np.prod(filter_shape[2:])/np.prod(poolsize))
        self.w = theano.shared(
            np.asarray(
                np.random.normal(loc=0, scale=np.sqrt(1.0/n_out), size=filter_shape),
                dtype=theano.config.floatX),
            borrow=True)
        self.b = theano.shared(
            np.asarray(
                np.random.normal(loc=0, scale=1.0, size=(filter_shape[0],)),
                dtype=theano.config.floatX),
            borrow=True)
        self.params = [self.w, self.b]

    def set_inpt(self, inpt, inpt_dropout, mini_batch_size):
        self.inpt = inpt.reshape(self.image_shape)
        conv_out = conv.conv2d(
            input=self.inpt, filters=self.w, filter_shape=self.filter_shape,
            image_shape=self.image_shape)
        #pooled_out = downsample.max_pool_2d(
        #    input=conv_out, ds=self.poolsize, ignore_border=True)
	pooled_out = pool_2d(
            input=conv_out, ws=self.poolsize, ignore_border=True)
	
        self.output = self.activation_fn(
            pooled_out + self.b.dimshuffle('x', 0, 'x', 'x'))
        self.output_dropout = self.output

class FullyConnectedLayer(object):

    def __init__(self, n_in, n_out, activation_fn=sigmoid, p_dropout=0.0):
        self.n_in = n_in
        self.n_out = n_out
        self.activation_fn = activation_fn
        self.p_dropout = p_dropout
        
        self.w = theano.shared(
            np.asarray(
                np.random.normal(
                    loc=0.0, scale=np.sqrt(1.0/n_out), size=(n_in, n_out)),
                dtype=theano.config.floatX),
            name='w', borrow=True)
        self.b = theano.shared(
            np.asarray(np.random.normal(loc=0.0, scale=1.0, size=(n_out,)),
                       dtype=theano.config.floatX),
            name='b', borrow=True)
        self.params = [self.w, self.b]

    def set_inpt(self, inpt, inpt_dropout, mini_batch_size):
        self.inpt = inpt.reshape((mini_batch_size, self.n_in))
        self.output = self.activation_fn(
            (1-self.p_dropout)*T.dot(self.inpt, self.w) + self.b)
        self.y_out = T.argmax(self.output, axis=1)
        self.inpt_dropout = dropout_layer(
            inpt_dropout.reshape((mini_batch_size, self.n_in)), self.p_dropout)
        self.output_dropout = self.activation_fn(
            T.dot(self.inpt_dropout, self.w) + self.b)

    def accuracy(self, y):
        "Return the accuracy for the mini-batch."
        return T.mean(T.eq(y, self.y_out))

class SoftmaxLayer(object):

    def __init__(self, n_in, n_out, p_dropout=0.0):
        self.n_in = n_in
        self.n_out = n_out
        self.p_dropout = p_dropout

        self.w = theano.shared(
            np.zeros((n_in, n_out), dtype=theano.config.floatX),
            name='w', borrow=True)
        self.b = theano.shared(
            np.zeros((n_out,), dtype=theano.config.floatX),
            name='b', borrow=True)
        self.params = [self.w, self.b]

    def set_inpt(self, inpt, inpt_dropout, mini_batch_size):
        self.inpt = inpt.reshape((mini_batch_size, self.n_in))
        self.output = softmax((1-self.p_dropout)*T.dot(self.inpt, self.w) + self.b)
        self.y_out = T.argmax(self.output, axis=1)
        self.inpt_dropout = dropout_layer(
            inpt_dropout.reshape((mini_batch_size, self.n_in)), self.p_dropout)
        self.output_dropout = softmax(T.dot(self.inpt_dropout, self.w) + self.b)

    def cost(self, net):
        "Return the log-likelihood cost."
        return -T.mean(T.log(self.output_dropout)[T.arange(net.y.shape[0]), net.y])

    def accuracy(self, y):
        "Return the accuracy for the mini-batch."
        return T.mean(T.eq(y, self.y_out))


def size(data):
    "Return the size of the dataset `data`."
    return data[0].get_value(borrow=True).shape[0]

def dropout_layer(layer, p_dropout):
    srng = shared_randomstreams.RandomStreams(
        np.random.RandomState(0).randint(999999))
    mask = srng.binomial(n=1, p=1-p_dropout, size=layer.shape)
    return layer*T.cast(mask, theano.config.floatX)

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

import pickle as cPickle

import gzip

import numpy as np

import theano

import theano.tensor as T

from theano.tensor.nnet import conv

from theano.tensor.nnet import softmax

from theano.tensor import shared_randomstreams

#from theano.tensor.signal import downsample

from theano.tensor.signal.pool import pool_2d

def linear(z): return z

def ReLU(z): return T.maximum(0.0, z)

from theano.tensor.nnet import sigmoid

from theano.tensor import tanh

GPU = True

if GPU:

#print ("Trying to run under a GPU. If this is not desired, then modify "+\

# "network3.py\nto set the GPU flag to False.")

try: theano.config.device = 'gpu'

except: pass

theano.config.floatX = 'float32'

else:

print ("Running with a CPU. If this is not desired, then the modify "+\

"network3.py to set\nthe GPU flag to True.")

def load_data_shared(filename="../data/mnist.pkl.gz"):

f = gzip.open(filename, 'rb')

training_data, validation_data, test_data = cPickle.load(f,encoding='latin1')

f.close()

def shared(data):

shared_x = theano.shared(

np.asarray(data[0], dtype=theano.config.floatX), borrow=True)

shared_y = theano.shared(

np.asarray(data[1], dtype=theano.config.floatX), borrow=True)

return shared_x, T.cast(shared_y, "int32")

return [shared(training_data), shared(validation_data), shared(test_data)]

class Network(object):

def __init__(self, layers, mini_batch_size):

self.layers = layers

self.mini_batch_size = mini_batch_size

self.params = [param for layer in self.layers for param in layer.params]

self.x = T.matrix("x")

self.y = T.ivector("y")

init_layer = self.layers[0]

init_layer.set_inpt(self.x, self.x, self.mini_batch_size)

for j in range(1, len(self.layers)):

prev_layer, layer = self.layers[j-1], self.layers[j]

layer.set_inpt(

prev_layer.output, prev_layer.output_dropout, self.mini_batch_size)

self.output = self.layers[-1].output

self.output_dropout = self.layers[-1].output_dropout

def SGD(self, training_data, epochs, mini_batch_size, eta,

validation_data, test_data, lmbda=0.0):

training_x, training_y = training_data

validation_x, validation_y = validation_data

test_x, test_y = test_data

num_training_batches = size(training_data)/mini_batch_size

num_validation_batches = size(validation_data)/mini_batch_size

num_test_batches = size(test_data)/mini_batch_size

l2_norm_squared = sum([(layer.w**2).sum() for layer in self.layers])

cost = self.layers[-1].cost(self)+\

0.5*lmbda*l2_norm_squared/num_training_batches

grads = T.grad(cost, self.params)

updates = [(param, param-eta*grad)

for param, grad in zip(self.params, grads)]

i = T.lscalar()

train_mb = theano.function(

[i], cost, updates=updates,

givens={

self.x:

training_x[i*self.mini_batch_size: (i+1)*self.mini_batch_size],

self.y:

training_y[i*self.mini_batch_size: (i+1)*self.mini_batch_size]

})

validate_mb_accuracy = theano.function(

[i], self.layers[-1].accuracy(self.y),

givens={

self.x:

validation_x[i*self.mini_batch_size: (i+1)*self.mini_batch_size],

self.y:

validation_y[i*self.mini_batch_size: (i+1)*self.mini_batch_size]

})

test_mb_accuracy = theano.function(

[i], self.layers[-1].accuracy(self.y),

givens={

self.x:

test_x[i*self.mini_batch_size: (i+1)*self.mini_batch_size],

self.y:

test_y[i*self.mini_batch_size: (i+1)*self.mini_batch_size]

})

self.test_mb_predictions = theano.function(

[i], self.layers[-1].y_out,

givens={

self.x:

test_x[i*self.mini_batch_size: (i+1)*self.mini_batch_size]

})

best_validation_accuracy = 0.0

for epoch in range(epochs):

#for minibatch_index in range(num_training_batches):

for minibatch_index in range(int(num_training_batches)):

iteration = num_training_batches*epoch+minibatch_index

if iteration % 1000 == 0:

print("Training mini-batch number {0}".format(iteration))

cost_ij = train_mb(minibatch_index)

if (iteration+1) % num_training_batches == 0:

#validation_accuracy = np.mean([validate_mb_accuracy(j) for j in range(num_validation_batches)])

validation_accuracy = np.mean([validate_mb_accuracy(j) for j in range(ini(num_validation_batches))])

print("Epoch {0}: validation accuracy {1:.2%}".format(

epoch, validation_accuracy))

if validation_accuracy >= best_validation_accuracy:

print("This is the best validation accuracy to date.")

best_validation_accuracy = validation_accuracy

best_iteration = iteration

if test_data:

#test_accuracy = np.mean([test_mb_accuracy(j) for j in range(num_test_batches)])

test_accuracy = np.mean([test_mb_accuracy(j) for j in range(ini(num_test_batches))])

print('The corresponding test accuracy is {0:.2%}'.format(

test_accuracy))

print("Finished training network.")

print("Best validation accuracy of {0:.2%} obtained at iteration {1}".format(

best_validation_accuracy, best_iteration))

print("Corresponding test accuracy of {0:.2%}".format(test_accuracy))

class ConvPoolLayer(object):

def __init__(self, filter_shape, image_shape, poolsize=(2, 2),

activation_fn=sigmoid):

self.filter_shape = filter_shape

self.image_shape = image_shape

self.poolsize = poolsize

self.activation_fn=activation_fn

n_out = (filter_shape[0]*np.prod(filter_shape[2:])/np.prod(poolsize))

self.w = theano.shared(

np.asarray(

np.random.normal(loc=0, scale=np.sqrt(1.0/n_out), size=filter_shape),

dtype=theano.config.floatX),

borrow=True)

self.b = theano.shared(

np.asarray(

np.random.normal(loc=0, scale=1.0, size=(filter_shape[0],)),

dtype=theano.config.floatX),

borrow=True)

self.params = [self.w, self.b]

def set_inpt(self, inpt, inpt_dropout, mini_batch_size):

self.inpt = inpt.reshape(self.image_shape)

conv_out = conv.conv2d(

input=self.inpt, filters=self.w, filter_shape=self.filter_shape,

image_shape=self.image_shape)

#pooled_out = downsample.max_pool_2d(

# input=conv_out, ds=self.poolsize, ignore_border=True)

pooled_out = pool_2d(

input=conv_out, ws=self.poolsize, ignore_border=True)

self.output = self.activation_fn(

pooled_out + self.b.dimshuffle('x', 0, 'x', 'x'))

self.output_dropout = self.output

class FullyConnectedLayer(object):

def __init__(self, n_in, n_out, activation_fn=sigmoid, p_dropout=0.0):

self.n_in = n_in

self.n_out = n_out

self.activation_fn = activation_fn

self.p_dropout = p_dropout

self.w = theano.shared(

np.asarray(

np.random.normal(

loc=0.0, scale=np.sqrt(1.0/n_out), size=(n_in, n_out)),

dtype=theano.config.floatX),

name='w', borrow=True)

self.b = theano.shared(

np.asarray(np.random.normal(loc=0.0, scale=1.0, size=(n_out,)),

dtype=theano.config.floatX),

name='b', borrow=True)

self.params = [self.w, self.b]

def set_inpt(self, inpt, inpt_dropout, mini_batch_size):

self.inpt = inpt.reshape((mini_batch_size, self.n_in))

self.output = self.activation_fn(

(1-self.p_dropout)*T.dot(self.inpt, self.w) + self.b)

self.y_out = T.argmax(self.output, axis=1)

self.inpt_dropout = dropout_layer(

inpt_dropout.reshape((mini_batch_size, self.n_in)), self.p_dropout)

self.output_dropout = self.activation_fn(

T.dot(self.inpt_dropout, self.w) + self.b)

def accuracy(self, y):

"Return the accuracy for the mini-batch."

return T.mean(T.eq(y, self.y_out))

class SoftmaxLayer(object):

def __init__(self, n_in, n_out, p_dropout=0.0):

self.n_in = n_in

self.n_out = n_out

self.p_dropout = p_dropout

self.w = theano.shared(

np.zeros((n_in, n_out), dtype=theano.config.floatX),

name='w', borrow=True)

self.b = theano.shared(

np.zeros((n_out,), dtype=theano.config.floatX),

name='b', borrow=True)

self.params = [self.w, self.b]

def set_inpt(self, inpt, inpt_dropout, mini_batch_size):

self.inpt = inpt.reshape((mini_batch_size, self.n_in))

self.output = softmax((1-self.p_dropout)*T.dot(self.inpt, self.w) + self.b)

self.y_out = T.argmax(self.output, axis=1)

self.inpt_dropout = dropout_layer(

inpt_dropout.reshape((mini_batch_size, self.n_in)), self.p_dropout)

self.output_dropout = softmax(T.dot(self.inpt_dropout, self.w) + self.b)

def cost(self, net):

"Return the log-likelihood cost."

return -T.mean(T.log(self.output_dropout)[T.arange(net.y.shape[0]), net.y])

def accuracy(self, y):

"Return the accuracy for the mini-batch."

return T.mean(T.eq(y, self.y_out))

def size(data):

"Return the size of the dataset `data`."

return data[0].get_value(borrow=True).shape[0]

def dropout_layer(layer, p_dropout):

srng = shared_randomstreams.RandomStreams(

np.random.RandomState(0).randint(999999))

mask = srng.binomial(n=1, p=1-p_dropout, size=layer.shape)

return layer*T.cast(mask, theano.config.floatX)

ソース（Python3.x)

工事中

テキストの課題を解かねば……

テキストの６章ではnetwork3.pyでいろいろな実験的試みを行っています。

GPUを使う場面もありそうです、さくらインターネットの高火力サーバーも使ってみる予定です。

network3.pyを使っていろいろなニューラルネットワークを試してみる

第六章で試しているコードです、ご参考までに。

データ拡張用のexpand_mnist.pyは実行すると本当に数分かかります。

全部をJetson Nanoで実行するのはチトつらい。GoogleのColaboratoryがいいかもしれない、現状Theanoも使えます。2019/04/04時点ではTheano==1.0.4のようです（最終バージョン）。

やってみたいアイデアがあったらコードを書いて実装してみて動くようだったらColabにあげる….という感じ？

Google Colaboratoryを使ってみます

(Trial-1)
100のニューロンを含む隠れ層1、訓練のエポック数は 60 、学習率は η=0.1、ミニバッチサイズは 10、正規化なしの条件で実行

>>>import network3
>>>from network3 import Network
>>>from network3 import ConvPoolLayer, FullyConnectedLayer, SoftmaxLayer
>>>training_data, validation_data, test_data = network3.load_data_shared()
>>>mini_batch_size = 10
>>>net = Network([FullyConnectedLayer(n_in=784, n_out=100),SoftmaxLayer(n_in=100, n_out=10)], mini_batch_size)
>>>net.SGD(training_data, 60, mini_batch_size, 0.1,validation_data, test_data)

JetsonでJupyter-notebook

(Trial-2)
ネットワークの最初の層へ、畳み込み-プーリング層を挿入

>>>import network3
>>>from network3 import Network
>>>from network3 import ConvPoolLayer, FullyConnectedLayer, SoftmaxLayer
>>>training_data, validation_data, test_data = network3.load_data_shared()
>>>mini_batch_size = 10
>>>net = Network([
ConvPoolLayer(image_shape=(mini_batch_size, 1, 28, 28),
filter_shape=(20, 1, 5, 5),
poolsize=(2, 2)),
FullyConnectedLayer(n_in=20*12*12, n_out=100),
SoftmaxLayer(n_in=100, n_out=10)], mini_batch_size)

>>>net.SGD(training_data, 60, mini_batch_size, 0.1,
validation_data, test_data)

(Trial-3)
2つ目の畳み込み-プーリング層を挿入

>>>import network3
>>>from network3 import Network
>>>from network3 import ConvPoolLayer, FullyConnectedLayer, SoftmaxLayer
>>>training_data, validation_data, test_data = network3.load_data_shared()
>>>mini_batch_size = 10
>>>net = Network([
ConvPoolLayer(image_shape=(mini_batch_size, 1, 28, 28),
filter_shape=(20, 1, 5, 5),
poolsize=(2, 2)),
ConvPoolLayer(image_shape=(mini_batch_size, 20, 12, 12),
filter_shape=(40, 20, 5, 5),
poolsize=(2, 2)),
FullyConnectedLayer(n_in=40*4*4, n_out=100),
SoftmaxLayer(n_in=100, n_out=10)], mini_batch_size)

>>>net.SGD(training_data, 60, mini_batch_size, 0.1,
validation_data, test_data)

(Trial-4)
活性化関数をシグモイドではなく、ReLUに変更、訓練のエポック数 60 、学習率 η=0.03 で訓練、パラメータ λ=0.1 としてL2 正規化を使用

>>>import network3
>>>from network3 import ReLU
>>>from network3 import Network
>>>from network3 import ConvPoolLayer, FullyConnectedLayer, SoftmaxLayer
>>>training_data, validation_data, test_data = network3.load_data_shared()
>>>mini_batch_size = 10
>>>net = Network([
ConvPoolLayer(image_shape=(mini_batch_size, 1, 28, 28),
filter_shape=(20, 1, 5, 5),
poolsize=(2, 2),
activation_fn=ReLU),
ConvPoolLayer(image_shape=(mini_batch_size, 20, 12, 12),
filter_shape=(40, 20, 5, 5),
poolsize=(2, 2),
activation_fn=ReLU),
FullyConnectedLayer(n_in=40*4*4, n_out=100, activation_fn=ReLU),
SoftmaxLayer(n_in=100, n_out=10)], mini_batch_size)

>>>net.SGD(training_data, 60, mini_batch_size, 0.03,
validation_data, test_data, lmbda=0.1)

(Trial-5)
結果を改良する別の手法としての訓練データの拡張
訓練データの各画像を1ピクセルずつ上下左右のいずれかの方向にずらして、入力の 50,000 のMNISTの訓練画像を、 250,000 に増やすことによって過適合を防ぐ効果が期待できる

$ python3 expand_mnist.py

>>>import network3
>>>from network3 import ReLU
>>>from network3 import Network
>>>from network3 import ConvPoolLayer, FullyConnectedLayer, SoftmaxLayer
>>>mini_batch_size = 10
>>>training_data, validation_data, test_data = network3.load_data_shared()
>>>expanded_training_data, _, _ = network3.load_data_shared(
“../data/mnist_expanded.pkl.gz”)

>>>net.SGD(expanded_training_data, 60, mini_batch_size, 0.03,
validation_data, test_data, lmbda=0.1)

(Trial-6)
サイズの大きな全結合層を追加してみる

>>>import network3
>>>from network3 import ReLU
>>>from network3 import Network
>>>from network3 import ConvPoolLayer, FullyConnectedLayer, SoftmaxLayer
>>>mini_batch_size = 10
>>>training_data, validation_data, test_data = network3.load_data_shared()
>>>expanded_training_data, _, _ = network3.load_data_shared(
“../data/mnist_expanded.pkl.gz”)

>>>net = Network([
ConvPoolLayer(image_shape=(mini_batch_size, 1, 28, 28),
filter_shape=(20, 1, 5, 5),
poolsize=(2, 2),
activation_fn=ReLU),
ConvPoolLayer(image_shape=(mini_batch_size, 20, 12, 12),
filter_shape=(40, 20, 5, 5),
poolsize=(2, 2),
activation_fn=ReLU),
FullyConnectedLayer(n_in=40*4*4, n_out=100, activation_fn=ReLU),
FullyConnectedLayer(n_in=100, n_out=100, activation_fn=ReLU),
SoftmaxLayer(n_in=100, n_out=10)], mini_batch_size)
>>>net.SGD(expanded_training_data, 60, mini_batch_size, 0.03,
validation_data, test_data, lmbda=0.1)

(Trial-7)
ドロップアウトを全結合層に適用してみる、訓練のエポック数 40、全結合層内のニューロン数 1,000
訓練するエポック数を 40 に減らすことで、ドロップアウトが過適合を抑制するため、高速に学習できる
ドロップアウトはニューロンの多くを効率的に省いているので、全結合層内のニューロン数の増量が必要

>>>import network3
>>>from network3 import ReLU
>>>from network3 import Network
>>>from network3 import ConvPoolLayer, FullyConnectedLayer, SoftmaxLayer
>>>mini_batch_size = 10
>>>training_data, validation_data, test_data = network3.load_data_shared()
>>>expanded_training_data, _, _ = network3.load_data_shared(
“../data/mnist_expanded.pkl.gz”)

>>>net = Network([
ConvPoolLayer(image_shape=(mini_batch_size, 1, 28, 28),
filter_shape=(20, 1, 5, 5),
poolsize=(2, 2),
activation_fn=ReLU),
ConvPoolLayer(image_shape=(mini_batch_size, 20, 12, 12),
filter_shape=(40, 20, 5, 5),
poolsize=(2, 2),
activation_fn=ReLU),
FullyConnectedLayer(
n_in=40*4*4, n_out=1000, activation_fn=ReLU, p_dropout=0.5),
FullyConnectedLayer(
n_in=1000, n_out=1000, activation_fn=ReLU, p_dropout=0.5),
SoftmaxLayer(n_in=1000, n_out=10, p_dropout=0.5)],
mini_batch_size)

>>>net.SGD(expanded_training_data, 40, mini_batch_size, 0.03,
validation_data, test_data)

FRONT

地図と画像のサイト

数字を分類するニューラルネットワークの実装をやってみる２-３

Be the first to comment

Leave a Reply コメントをキャンセル