昨日は強化学習用のHMEの計算を行列で表現した。
今日はそれを使って実際に実装していく。
なお、Rubyでの実装は、以下を参照:
GateNetworkクラス
まずはゲートネットワークの実装。
//============================== // ValueNetwork //------------------------------ // GateNetwork.swift //============================== import Foundation import Accelerate class GateNetwork { class GateWeight: Weight { private let weight: Matrix private init(weight: Matrix) { self.weight = weight } func scale(scalar: Double) -> Weight { let weight = self.weight * scalar return GateWeight(weight: weight) } func add(other: Weight) -> Weight { let otherWeight = other as! GateWeight let weight = self.weight + otherWeight.weight return GateWeight(weight: weight) } func subtract(other: Weight) -> Weight { let otherWeight = other as! GateWeight let weight = self.weight - otherWeight.weight return GateWeight(weight: weight) } } private var weight: GateWeight init(inputSize: Int, outputSize: Int) { let weight = Matrix.filledWith(0.0, row: outputSize, col: inputSize) self.weight = GateWeight(weight: weight) } func getValue(input: Vector) -> Vector { let weightedInput = (self.weight.weight * input) as! Vector let maxValue: Double = weightedInput.toArray().maxElement()! let limitedWeightedInput = weightedInput - Vector.filledWith(maxValue, size: weightedInput.size) let plainOutput = limitedWeightedInput.map(exp) let output = plainOutput.normalizedVector(Int(LA_L1_NORM)) return output } func getValueAndWeightGradient(input: Vector, expertOutput: Vector) -> (Vector, Weight) { let output = self.getValue(input) let outputMatrix = output *+ Vector.filledWith(1.0, size: output.size) let gradientMatrix = outputMatrix <*> (Matrix.identityMatrix(output.size) - outputMatrix) let weightGradientMatrix = ((gradientMatrix * expertOutput) as! Vector) *+ input let weightGradient = GateWeight(weight: weightGradientMatrix) return (output, weightGradient) } func getValue(input: Vector, withWeightDiff weightDiff: Weight, scale: Double) -> Vector { let newWeight = (self.weight + weightDiff * scale) as! GateWeight let weightedInput = (newWeight.weight * input) as! Vector let maxValue: Double = weightedInput.toArray().maxElement()! let limitedWeightedInput = weightedInput - Vector.filledWith(maxValue, size: weightedInput.size) let plainOutput = limitedWeightedInput.map(exp) let output = plainOutput.normalizedVector(Int(LA_L1_NORM)) return output } func addWeight(weightDiff: Weight) { self.weight = (self.weight + weightDiff) as! GateWeight } }
細かい説明はしないけど、やってる計算は強化学習用のニューラルネットワークをSwiftで書いてみた。(その4) - いものやま。に書いたとおり。
ValueHMEクラス
そして、強化学習用のHMEの実装。
//============================== // ValueNetwork //------------------------------ // ValueHME.swift //============================== import Foundation class ValueHME: ValueNetwork { class HMEWeight: Weight { private let expertWeight: [Weight] private let gateWeight: Weight private init(expertWeight: [Weight], gateWeight: Weight) { self.expertWeight = expertWeight self.gateWeight = gateWeight } func scale(scalar: Double) -> Weight { let expertWeight = self.expertWeight.map { (weight: Weight) in return weight * scalar } let gateWeight = self.gateWeight * scalar return HMEWeight(expertWeight: expertWeight, gateWeight: gateWeight) } func add(other: Weight) -> Weight { let otherWeight = other as! HMEWeight let expertWeight = zip(self.expertWeight, otherWeight.expertWeight).map { (selfExpertWeight: Weight, otherExpertWeight: Weight) in return selfExpertWeight + otherExpertWeight } let gateWeight = self.gateWeight + otherWeight.gateWeight return HMEWeight(expertWeight: expertWeight, gateWeight: gateWeight) } func subtract(other: Weight) -> Weight { let otherWeight = other as! HMEWeight let expertWeight = zip(self.expertWeight, otherWeight.expertWeight).map { (selfExpertWeight: Weight, otherExpertWeight: Weight) in return selfExpertWeight - otherExpertWeight } let gateWeight = self.gateWeight - otherWeight.gateWeight return HMEWeight(expertWeight: expertWeight, gateWeight: gateWeight) } } class func create(inputSize: Int, outputMin: Double, outputMax: Double, structure: [AnyObject]) -> ValueHME { var experts: [ValueNetwork] = [] for item in structure { if let hiddenUnitSize = item as? Int { let valueNN = ValueNN(inputSize: inputSize, hiddenUnitSize: hiddenUnitSize, outputMin: outputMin, outputMax: outputMax) experts.append(valueNN) } else if let lowerStructure = item as? [AnyObject] { let lowerHME = ValueHME.create(inputSize, outputMin: outputMin, outputMax: outputMax, structure: lowerStructure) experts.append(lowerHME) } } return ValueHME(inputSize: inputSize, experts: experts) } private let experts: [ValueNetwork] private let gateNetwork: GateNetwork init(inputSize: Int, experts: [ValueNetwork]) { self.experts = experts self.gateNetwork = GateNetwork(inputSize: inputSize, outputSize: experts.count) } func getValue(input: Vector) -> Double { let expertOutputArray = self.experts.map { (expert: ValueNetwork) in return expert.getValue(input) } let expertOutput = Vector.fromArray(expertOutputArray) let gateOutput = self.gateNetwork.getValue(input) return gateOutput +* expertOutput } func getValueAndWeightGradient(input: Vector) -> (Double, Weight) { var expertOutputArray: [Double] = [] var expertWeightGradientArray: [Weight] = [] for expert in self.experts { let (output, weightGradient) = expert.getValueAndWeightGradient(input) expertOutputArray.append(output) expertWeightGradientArray.append(weightGradient) } let expertOutput = Vector.fromArray(expertOutputArray) let (gateOutput, gateWeightGradient) = self.gateNetwork.getValueAndWeightGradient(input, expertOutput: expertOutput) let expertWeightGradient = zip(gateOutput.toArray(), expertWeightGradientArray).map { (gateOutput: Double, expertWeightGradient: Weight) in return gateOutput * expertWeightGradient } let output = gateOutput +* expertOutput let weightGradient = HMEWeight(expertWeight: expertWeightGradient, gateWeight: gateWeightGradient) return (output, weightGradient) } func getValue(input: Vector, withWeightDiff weightDiff: Weight, scale: Double) -> Double { let hmeWeightDiff = weightDiff as! HMEWeight let expertOutputArray = zip(self.experts, hmeWeightDiff.expertWeight).map { (expert: ValueNetwork, expertWeightDiff: Weight) in return expert.getValue(input, withWeightDiff: expertWeightDiff, scale: scale) } let expertOutput = Vector.fromArray(expertOutputArray) let gateOutput = self.gateNetwork.getValue(input, withWeightDiff: hmeWeightDiff.gateWeight, scale: scale) return gateOutput +* expertOutput } func addWeight(weightDiff: Weight) { let hmeWeightDiff = weightDiff as! HMEWeight for (expert, expertWeightDiff) in zip(self.experts, hmeWeightDiff.expertWeight) { expert.addWeight(expertWeightDiff) } self.gateNetwork.addWeight(hmeWeightDiff.gateWeight) } }
こちらも強化学習用のニューラルネットワークをSwiftで書いてみた。(その4) - いものやま。に書いたとおり。
動作確認
動作確認として、次のようなコードを書いた。
(ValueNNの動作確認のコードを修正し、GateNetwork、ValueHMEの動作確認を追加している)
//============================== // ValueNetwork //------------------------------ // main.swift // // Test code for ValueNetwork //============================== import Foundation let inputMatrix = Matrix.fromArray([[1.0, 1.0, 1.0], [1.0, 1.0, 0.0], [1.0, 0.0, 1.0], [0.0, 1.0, 1.0], [1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0], [0.0, 0.0, 0.0]]).transpose() // ValueNN print("----------") print("ValueNN") print("----------") let valueNN = ValueNN(inputSize: 3, hiddenUnitSize: 10, outputMin: -1.0, outputMax: 1.0) for col in (0..<inputMatrix.col) { let input = inputMatrix.colVector(col) let (output, weightGradient) = valueNN.getValueAndWeightGradient(input) let outputWithWeightGradient = valueNN.getValue(input, withWeightDiff: weightGradient, scale: 1.0) var diff = outputWithWeightGradient - output var scale = 1.0 var upperBound: Double! = nil var lowerBound: Double! = nil var last = 100 for i in (0..<100) { if (diff < 0.0) || (1.1 < diff) { upperBound = scale scale = (lowerBound == nil) ? scale / 2.0 : (upperBound + lowerBound) / 2.0 } else if diff < 0.9 { lowerBound = scale scale = (upperBound == nil) ? scale * 2.0 : (upperBound + lowerBound) / 2.0 } else { last = i break } let outputWithScaledWeightGradient = valueNN.getValue(input, withWeightDiff: weightGradient, scale: scale) diff = outputWithScaledWeightGradient - output } let outputWith01Scaled = valueNN.getValue(input, withWeightDiff: weightGradient, scale: 0.1 * scale) let diffWith01Scaled = outputWith01Scaled - output print("input: \(input.transpose()), output: \(output)") print(" scale: \(scale), iterations: \(last)") print(" diff (scaled): \(diff), diff (0.1*scaled): \(diffWith01Scaled)") let weightDiff = weightGradient * 0.1 * scale valueNN.addWeight(weightDiff) let newOutput = valueNN.getValue(input) let newDiff = newOutput - output print(" new output: \(newOutput), diff: \(newDiff)") } // GateNetwork print("----------") print("GateNetwork") print("----------") let gateNetwork = GateNetwork(inputSize: 3, outputSize: 2) let expertOutput = Vector.fromArray([0.2, 0.8]) for col in (0..<inputMatrix.col) { let input = inputMatrix.colVector(col) let (gateOutput, weightGradient) = gateNetwork.getValueAndWeightGradient(input, expertOutput: expertOutput) let output = gateOutput +* expertOutput let gateOutputWithWeightGradient = gateNetwork.getValue(input, withWeightDiff: weightGradient, scale: 1.0) let outputWithWeightGradient = gateOutputWithWeightGradient +* expertOutput let diff = outputWithWeightGradient - output print("input: \(input.transpose()), output: \(output)") print(" with gradient: \(outputWithWeightGradient), diff: \(diff)") gateNetwork.addWeight(weightGradient) let newGateOutput = gateNetwork.getValue(input) let newOutput = newGateOutput +* expertOutput let newDiff = newOutput - output print(" new output: \(newOutput), diff: \(newDiff)") } // ValueHME print("----------") print("ValueHME") print("----------") let valueHME = ValueHME.create(3, outputMin: -1.0, outputMax: 1.0, structure: [10, 10]) for col in (0..<inputMatrix.col) { let input = inputMatrix.colVector(col) let (output, weightGradient) = valueHME.getValueAndWeightGradient(input) let outputWithWeightGradient = valueHME.getValue(input, withWeightDiff: weightGradient, scale: 1.0) var diff = outputWithWeightGradient - output var scale = 1.0 var upperBound: Double! = nil var lowerBound: Double! = nil var last = 100 for i in (0..<100) { if (diff < 0.0) || (1.1 < diff) { upperBound = scale scale = (lowerBound == nil) ? scale / 2.0 : (upperBound + lowerBound) / 2.0 } else if diff < 0.9 { lowerBound = scale scale = (upperBound == nil) ? scale * 2.0 : (upperBound + lowerBound) / 2.0 } else { last = i break } let outputWithScaledWeightGradient = valueHME.getValue(input, withWeightDiff: weightGradient, scale: scale) diff = outputWithScaledWeightGradient - output } let outputWith01Scaled = valueHME.getValue(input, withWeightDiff: weightGradient, scale: 0.1 * scale) let diffWith01Scaled = outputWith01Scaled - output print("input: \(input.transpose()), output: \(output)") print(" scale: \(scale), iterations: \(last)") print(" diff (scaled): \(diff), diff (0.1*scaled): \(diffWith01Scaled)") let weightDiff = weightGradient * 0.1 * scale valueHME.addWeight(weightDiff) let newOutput = valueHME.getValue(input) let newDiff = newOutput - output print(" new output: \(newOutput), diff: \(newDiff)") }
実行例は、以下:
---------- ValueNN ---------- input: [1.0, 1.0, 1.0], output: -0.183942077296883 scale: 0.1875, iterations: 4 diff (scaled): 1.01724963088052, diff (0.1*scaled): 0.105130155911477 new output: -0.0788119213854061, diff: 0.105130155911477 input: [1.0, 1.0, 0.0], output: -0.0727086141640791 scale: 0.25, iterations: 2 diff (scaled): 1.09166909286381, diff (0.1*scaled): 0.129576278870883 new output: 0.0568676647068041, diff: 0.129576278870883 input: [1.0, 0.0, 1.0], output: -0.383684591887409 scale: 0.25, iterations: 2 diff (scaled): 1.07005618759768, diff (0.1*scaled): 0.115689382429312 new output: -0.267995209458097, diff: 0.115689382429312 input: [0.0, 1.0, 1.0], output: 0.501642422844154 scale: 1.25, iterations: 3 diff (scaled): 1.03458420400201, diff (0.1*scaled): 0.463696492593989 new output: 0.965338915438143, diff: 0.463696492593989 input: [1.0, 0.0, 0.0], output: 0.0147913651058598 scale: 0.5, iterations: 1 diff (scaled): 1.09849345190749, diff (0.1*scaled): 0.224957607158544 new output: 0.239748972264404, diff: 0.224957607158544 input: [0.0, 1.0, 0.0], output: 1.00349356627074 scale: 18.0, iterations: 8 diff (scaled): 1.04838464458158, diff (0.1*scaled): 0.0657863751854824 new output: 1.06927994145622, diff: 0.0657863751854824 input: [0.0, 0.0, 1.0], output: 1.00445024397176 scale: 18.0, iterations: 8 diff (scaled): 1.02882994407428, diff (0.1*scaled): 0.070282826622567 new output: 1.07473307059433, diff: 0.070282826622567 input: [0.0, 0.0, 0.0], output: 1.04718514605304 scale: 20.0, iterations: 7 diff (scaled): 0.965905785136957, diff (0.1*scaled): 0.0690312508752593 new output: 1.1162163969283, diff: 0.0690312508752593 ---------- GateNetwork ---------- input: [1.0, 1.0, 1.0], output: 0.5 with gradient: 0.626569701575002, diff: 0.126569701575002 new output: 0.626569701575002, diff: 0.126569701575002 input: [1.0, 1.0, 0.0], output: 0.587393783735477 with gradient: 0.627638891939002, diff: 0.0402451082035251 new output: 0.627638891939002, diff: 0.0402451082035251 input: [1.0, 0.0, 1.0], output: 0.60807465845349 with gradient: 0.637937581551879, diff: 0.0298629230983883 new output: 0.637937581551879, diff: 0.0298629230983883 input: [0.0, 1.0, 1.0], output: 0.623373727874239 with gradient: 0.645955025290896, diff: 0.022581297416657 new output: 0.645955025290896, diff: 0.022581297416657 input: [1.0, 0.0, 0.0], output: 0.583841706173601 with gradient: 0.605472969776552, diff: 0.0216312636029506 new output: 0.605472969776552, diff: 0.0216312636029506 input: [0.0, 1.0, 0.0], output: 0.580303255512027 with gradient: 0.602887409981527, diff: 0.0225841544694998 new output: 0.602887409981527, diff: 0.0225841544694998 input: [0.0, 0.0, 1.0], output: 0.575481724278274 with gradient: 0.59937319511023, diff: 0.0238914708319556 new output: 0.59937319511023, diff: 0.0238914708319556 input: [0.0, 0.0, 0.0], output: 0.5 with gradient: 0.5, diff: 0.0 new output: 0.5, diff: 0.0 ---------- ValueHME ---------- input: [1.0, 1.0, 1.0], output: 0.583369090800088 scale: 12.0, iterations: 5 diff (scaled): 0.955143642897197, diff (0.1*scaled): 0.55711728368868 new output: 1.14048637448877, diff: 0.55711728368868 input: [1.0, 1.0, 0.0], output: 1.17912976992144 scale: 10.0, iterations: 6 diff (scaled): 0.958244431157407, diff (0.1*scaled): 0.218163118461483 new output: 1.39729288838292, diff: 0.218163118461483 input: [1.0, 0.0, 1.0], output: 1.26053893688435 scale: 7.0, iterations: 5 diff (scaled): 1.00825100197074, diff (0.1*scaled): 0.100641423159278 new output: 1.36118036004363, diff: 0.100641423159278 input: [0.0, 1.0, 1.0], output: 1.25521721395406 scale: 7.0, iterations: 5 diff (scaled): 0.98711439450816, diff (0.1*scaled): 0.0774174523384112 new output: 1.33263466629247, diff: 0.0774174523384112 input: [1.0, 0.0, 0.0], output: 1.37228160440216 scale: 24.0, iterations: 6 diff (scaled): 0.926736515167874, diff (0.1*scaled): 0.151453564687681 new output: 1.52373516908985, diff: 0.151453564687681 input: [0.0, 1.0, 0.0], output: 1.42285096816873 scale: 10.0, iterations: 6 diff (scaled): 0.995109500116182, diff (0.1*scaled): 0.0862919554937307 new output: 1.50914292366246, diff: 0.0862919554937307 input: [0.0, 0.0, 1.0], output: 1.33805920315436 scale: 7.0, iterations: 5 diff (scaled): 1.0092130994799, diff (0.1*scaled): 0.0818008367414018 new output: 1.41986003989576, diff: 0.0818008367414018 input: [0.0, 0.0, 0.0], output: 1.33449511265373 scale: 16.0, iterations: 4 diff (scaled): 0.947467726275787, diff (0.1*scaled): 0.0741106840380314 new output: 1.40860579669177, diff: 0.0741106840380314
これでValueNetworkの実装はOK、と言いたいところなんだけど、このままだとデータの保存や復帰が出来ないので、ちょっと困る。
なので、データの保存・復帰が出来るように修正していきたい。
今日はここまで!