昨日は強化学習用のニューラルネットワークの計算を行列で表現した。
今日はそれを使って実際に実装していく。
なお、Swiftでの行列演算については、以下を参照:
ここで定義したMatrixクラス、Vectorクラスを使っていくことになる。
また、Rubyでの実装は以下を参照:
(行列計算を使ってないので、読むのがかなり大変だけど・・・)
ニューラルネットワークの仕様は、上記のRubyのものと同じにする。
ValueNetworkプロトコル
まず、強化学習の関数近似に使うのがニューラルネットワークでもHMEでもいいように、インタフェースとなるプロトコルを定義する:
//============================== // ValueNetwork //------------------------------ // ValueNetwork.swift //============================== import Foundation protocol ValueNetwork { func getValue(input: Vector) -> Double func getValueAndWeightGradient(input: Vector) -> (Double, Weight) func getValue(input: Vector, withWeightDiff weightDiff: Weight, scale: Double) -> Double func addWeight(weightDiff: Weight) }
定義の中でWeight
という型が使われてるけど、これは次に定義するプロトコルで、重みの内部構成を隠蔽するためのインタフェース。
Weightプロトコル
ということで、Weightプロトコルの定義:
//============================== // ValueNetwork //------------------------------ // Weight.swift //============================== import Foundation protocol Weight { func scale(scalar: Double) -> Weight func add(other: Weight) -> Weight func subtract(other: Weight) -> Weight } func *(left: Weight, right: Double) -> Weight { return left.scale(right) } func *(left: Double, right: Weight) -> Weight { return right.scale(left) } func +(left: Weight, right: Weight) -> Weight { return left.add(right) } func -(left: Weight, right: Weight) -> Weight { return left.subtract(right) }
重みに対して行いたいことは、足し算(と引き算)と掛け算なので、それらをインタフェースとして定義している。
また、記述が簡単になるように、演算子のオーバーロードも行っている。
ValueNNクラス
そして、強化学習用のニューラルネットワークの実装。
ValueNNクラスとして実装した:
//============================== // ValueNetwork //------------------------------ // ValueNN.swift //============================== import Foundation class ValueNN: ValueNetwork { class NNWeight: Weight { private let hiddenLayerWeight: Matrix private let hiddenLayerBias: Vector private let outputLayerWeight: Vector private let outputLayerBias: Double private init(hiddenLayerWeight: Matrix, hiddenLayerBias: Vector, outputLayerWeight: Vector, outputLayerBias: Double) { self.hiddenLayerWeight = hiddenLayerWeight self.hiddenLayerBias = hiddenLayerBias self.outputLayerWeight = outputLayerWeight self.outputLayerBias = outputLayerBias } func scale(scalar: Double) -> Weight { let hiddenLayerWeight = self.hiddenLayerWeight * scalar let hiddenLayerBias = self.hiddenLayerBias * scalar let outputLayerWeight = self.outputLayerWeight * scalar let outputLayerBias = self.outputLayerBias * scalar return NNWeight(hiddenLayerWeight: hiddenLayerWeight, hiddenLayerBias: hiddenLayerBias, outputLayerWeight: outputLayerWeight, outputLayerBias: outputLayerBias) } func add(other: Weight) -> Weight { let otherNNWeight = other as! NNWeight let hiddenLayerWeight = self.hiddenLayerWeight + otherNNWeight.hiddenLayerWeight let hiddenLayerBias = self.hiddenLayerBias + otherNNWeight.hiddenLayerBias let outputLayerWeight = self.outputLayerWeight + otherNNWeight.outputLayerWeight let outputLayerBias = self.outputLayerBias + otherNNWeight.outputLayerBias return NNWeight(hiddenLayerWeight: hiddenLayerWeight, hiddenLayerBias: hiddenLayerBias, outputLayerWeight: outputLayerWeight, outputLayerBias: outputLayerBias) } func subtract(other: Weight) -> Weight { let otherNNWeight = other as! NNWeight let hiddenLayerWeight = self.hiddenLayerWeight - otherNNWeight.hiddenLayerWeight let hiddenLayerBias = self.hiddenLayerBias - otherNNWeight.hiddenLayerBias let outputLayerWeight = self.outputLayerWeight - otherNNWeight.outputLayerWeight let outputLayerBias = self.outputLayerBias - otherNNWeight.outputLayerBias return NNWeight(hiddenLayerWeight: hiddenLayerWeight, hiddenLayerBias: hiddenLayerBias, outputLayerWeight: outputLayerWeight, outputLayerBias: outputLayerBias) } } private static let activationNormalGradient: Double = 1.0 private static let activationLesserGradient: Double = 0.1 private var weight: NNWeight let outputMin: Double let outputMax: Double init(inputSize: Int, hiddenUnitSize: Int, outputMin: Double, outputMax: Double) { self.outputMin = outputMin self.outputMax = outputMax let hiddenLayerWeightVariance = 1.0 / (Double(inputSize) + 1.0) let hiddenLayerWeightGenerator = NormalDistRandom.init(expected: 0.0, variance: hiddenLayerWeightVariance) let hiddenLayerWeightBuffer = (0..<hiddenUnitSize).map { _ in return (0..<inputSize).map { _ in return hiddenLayerWeightGenerator.getRandom() } } let hiddenLayerWeight = Matrix.fromArray(hiddenLayerWeightBuffer) let hiddenLayerBiasBuffer = (0..<hiddenUnitSize).map { _ in return hiddenLayerWeightGenerator.getRandom() } let hiddenLayerBias = Vector.fromArray(hiddenLayerBiasBuffer) let outputLayerWeightVariance = 1.0 / (Double(hiddenUnitSize) + 1.0) let outputLayerWeightGenerator = NormalDistRandom(expected: 0.0, variance: outputLayerWeightVariance) let outputLayerWeightBuffer = (0..<hiddenUnitSize).map { _ in return outputLayerWeightGenerator.getRandom() } let outputLayerWeight = Vector.fromArray(outputLayerWeightBuffer) let outputLayerBias = outputLayerWeightGenerator.getRandom() self.weight = NNWeight(hiddenLayerWeight: hiddenLayerWeight, hiddenLayerBias: hiddenLayerBias, outputLayerWeight: outputLayerWeight, outputLayerBias: outputLayerBias) } func getValue(input: Vector) -> Double { let hiddenLayerWeightedInput = ((self.weight.hiddenLayerWeight * input) as! Vector) + self.weight.hiddenLayerBias let hiddenLayerOutput = hiddenLayerWeightedInput.map { [unowned self] (weightedInput: Double) in return self.hiddenLayerOutputForWeightedInput(weightedInput) } let outputLayerWeightedInput = self.weight.outputLayerWeight +* hiddenLayerOutput + self.weight.outputLayerBias let outputLayerOutput = self.outputLayerOutputForWeightedInput(outputLayerWeightedInput) return outputLayerOutput } func getValueAndWeightGradient(input: Vector) -> (Double, Weight) { let hiddenLayerWeightedInput = ((self.weight.hiddenLayerWeight * input) as! Vector) + self.weight.hiddenLayerBias let hiddenLayerOutput = hiddenLayerWeightedInput.map { [unowned self] (weightedInput: Double) in return self.hiddenLayerOutputForWeightedInput(weightedInput) } let hiddenLayerGradient = hiddenLayerWeightedInput.map { [unowned self] (weightedInput: Double) in return self.hiddenLayerGradientForWeightedInput(weightedInput) } let outputLayerWeightedInput = self.weight.outputLayerWeight +* hiddenLayerOutput + self.weight.outputLayerBias let outputLayerOutput = self.outputLayerOutputForWeightedInput(outputLayerWeightedInput) let outputLayerGradient = self.outputLayerGradientForWeightedInput(outputLayerWeightedInput) let outputLayerDelta = outputLayerGradient let hiddenLayerDelta = outputLayerDelta * self.weight.outputLayerWeight <*> hiddenLayerGradient let hiddenLayerWeightGradient = hiddenLayerDelta *+ input let hiddenLayerBiasGradient = hiddenLayerDelta let outputLayerWeightGradient = outputLayerDelta * hiddenLayerOutput let outputLayerBiasGradint = outputLayerDelta let weightGradient = NNWeight(hiddenLayerWeight: hiddenLayerWeightGradient, hiddenLayerBias: hiddenLayerBiasGradient, outputLayerWeight: outputLayerWeightGradient, outputLayerBias: outputLayerBiasGradint) return (outputLayerOutput, weightGradient) } func getValue(input: Vector, withWeightDiff weightDiff: Weight, scale: Double) -> Double { let newWeight = (self.weight + weightDiff * scale) as! NNWeight let hiddenLayerWeightedInput = ((newWeight.hiddenLayerWeight * input) as! Vector) + newWeight.hiddenLayerBias let hiddenLayerOutput = hiddenLayerWeightedInput.map { [unowned self] (weightedInput: Double) in return self.hiddenLayerOutputForWeightedInput(weightedInput) } let outputLayerWeightedInput = newWeight.outputLayerWeight +* hiddenLayerOutput + newWeight.outputLayerBias let outputLayerOutput = self.outputLayerOutputForWeightedInput(outputLayerWeightedInput) return outputLayerOutput } func addWeight(weightDiff: Weight) { self.weight = (self.weight + weightDiff) as! NNWeight } private func hiddenLayerOutputForWeightedInput(weightedInput: Double) -> Double { if weightedInput >= 0.0 { return ValueNN.activationNormalGradient * weightedInput } else { return ValueNN.activationLesserGradient * weightedInput } } private func hiddenLayerGradientForWeightedInput(weightedInput: Double) -> Double { if weightedInput >= 0.0 { return ValueNN.activationNormalGradient } else { return ValueNN.activationLesserGradient } } private func outputLayerOutputForWeightedInput(weightedInput: Double) -> Double { if weightedInput < self.outputMin { return (ValueNN.activationLesserGradient * weightedInput + (ValueNN.activationNormalGradient - ValueNN.activationLesserGradient) * self.outputMin) } else if weightedInput < self.outputMax { return ValueNN.activationNormalGradient * weightedInput } else { return (ValueNN.activationLesserGradient * weightedInput + (ValueNN.activationNormalGradient - ValueNN.activationLesserGradient) * self.outputMax) } } private func outputLayerGradientForWeightedInput(weightedInput: Double) -> Double { if (weightedInput < self.outputMin) || (self.outputMax < weightedInput) { return ValueNN.activationLesserGradient } else { return ValueNN.activationNormalGradient } } }
細かく説明しないけど、強化学習用のニューラルネットワークをSwiftで書いてみた。(その2) - いものやま。に書いた行列計算をそのまま実装している。
やっている計算はRubyのものと一緒なんだけど、Matrixクラス、Vectorクラスを定義してあるので、(比較的)スッキリした実装になっている。
動作確認
動作確認として、Rubyのときと同様に、以下のコードを書いた:
//============================== // ValueNetwork //------------------------------ // main.swift // // Test code for ValueNetwork //============================== import Foundation // ValueNN let valueNN = ValueNN(inputSize: 3, hiddenUnitSize: 10, outputMin: -1.0, outputMax: 1.0) let inputMatrix = Matrix.fromArray([[1.0, 1.0, 1.0], [1.0, 1.0, 0.0], [1.0, 0.0, 1.0], [0.0, 1.0, 1.0], [1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]]).transpose() for col in (0..<inputMatrix.col) { let input = inputMatrix.colVector(col) let (output, weightGradient) = valueNN.getValueAndWeightGradient(input) let outputWithWeightGradient = valueNN.getValue(input, withWeightDiff: weightGradient, scale: 1.0) var diff = outputWithWeightGradient - output var scale = 1.0 var upperBound: Double! = nil var lowerBound: Double! = nil var last = 100 for i in (0..<100) { if (diff < 0.0) || (1.1 < diff) { upperBound = scale scale = (lowerBound == nil) ? scale / 2.0 : (upperBound + lowerBound) / 2.0 } else if diff < 0.9 { lowerBound = scale scale = (upperBound == nil) ? scale * 2.0 : (upperBound + lowerBound) / 2.0 } else { last = i break } let outputWithScaledWeightGradient = valueNN.getValue(input, withWeightDiff: weightGradient, scale: scale) diff = outputWithScaledWeightGradient - output } let outputWith01Scaled = valueNN.getValue(input, withWeightDiff: weightGradient, scale: 0.1 * scale) let diffWith01Scaled = outputWith01Scaled - output print("input: \(input.transpose()), output: \(output)") print(" scale: \(scale), iterations: \(last)") print(" diff (scaled): \(diff), diff (0.1*scaled): \(diffWith01Scaled)") let weightDiff = weightGradient * 0.1 * scale valueNN.addWeight(weightDiff) let newOutput = valueNN.getValue(input) let newDiff = newOutput - output print(" new output: \(newOutput), diff: \(newDiff)") } // 以下略
実行例は、以下:
---------- ValueNN ---------- input: [1.0, 1.0, 1.0], output: 0.131021133192784 scale: 0.125, iterations: 3 diff (scaled): 0.939252246852243, diff (0.1*scaled): 0.155481391989248 new output: 0.286502525182032, diff: 0.155481391989248 input: [1.0, 1.0, 0.0], output: 0.363438002291251 scale: 0.5, iterations: 1 diff (scaled): 0.92473632645851, diff (0.1*scaled): 0.331184714810343 new output: 0.694622717101594, diff: 0.331184714810343 input: [1.0, 0.0, 1.0], output: 0.0041779249095818 scale: 0.125, iterations: 3 diff (scaled): 1.03338785763323, diff (0.1*scaled): 0.13662619759527 new output: 0.140804122504852, diff: 0.13662619759527 input: [0.0, 1.0, 1.0], output: 0.257684625662284 scale: 0.5, iterations: 1 diff (scaled): 0.992301758621281, diff (0.1*scaled): 0.335977958448971 new output: 0.593662584111254, diff: 0.335977958448971 input: [1.0, 0.0, 0.0], output: 0.184531558718531 scale: 0.5, iterations: 1 diff (scaled): 1.01382568012975, diff (0.1*scaled): 0.27650775017151 new output: 0.461039308890041, diff: 0.27650775017151 input: [0.0, 1.0, 0.0], output: 0.166668276265283 scale: 0.5, iterations: 1 diff (scaled): 0.974321711025463, diff (0.1*scaled): 0.217514514547388 new output: 0.384182790812671, diff: 0.217514514547388 input: [0.0, 0.0, 1.0], output: 0.180236823017496 scale: 0.5, iterations: 1 diff (scaled): 0.945552060789391, diff (0.1*scaled): 0.200679893784258 new output: 0.380916716801753, diff: 0.200679893784258 # 以下略
今日はここまで!