昨日は強化学習用のHMEの計算を行列で表現した。
今日はそれを使って実際に実装していく。
なお、Rubyでの実装は、以下を参照:
GateNetworkクラス
まずはゲートネットワークの実装。
import Foundation
import Accelerate
class GateNetwork {
class GateWeight: Weight {
private let weight: Matrix
private init(weight: Matrix) {
self.weight = weight
}
func scale(scalar: Double) -> Weight {
let weight = self.weight * scalar
return GateWeight(weight: weight)
}
func add(other: Weight) -> Weight {
let otherWeight = other as! GateWeight
let weight = self.weight + otherWeight.weight
return GateWeight(weight: weight)
}
func subtract(other: Weight) -> Weight {
let otherWeight = other as! GateWeight
let weight = self.weight - otherWeight.weight
return GateWeight(weight: weight)
}
}
private var weight: GateWeight
init(inputSize: Int, outputSize: Int) {
let weight = Matrix.filledWith(0.0, row: outputSize, col: inputSize)
self.weight = GateWeight(weight: weight)
}
func getValue(input: Vector) -> Vector {
let weightedInput = (self.weight.weight * input) as! Vector
let maxValue: Double = weightedInput.toArray().maxElement()!
let limitedWeightedInput = weightedInput - Vector.filledWith(maxValue, size: weightedInput.size)
let plainOutput = limitedWeightedInput.map(exp)
let output = plainOutput.normalizedVector(Int(LA_L1_NORM))
return output
}
func getValueAndWeightGradient(input: Vector, expertOutput: Vector) -> (Vector, Weight) {
let output = self.getValue(input)
let outputMatrix = output *+ Vector.filledWith(1.0, size: output.size)
let gradientMatrix = outputMatrix <*> (Matrix.identityMatrix(output.size) - outputMatrix)
let weightGradientMatrix = ((gradientMatrix * expertOutput) as! Vector) *+ input
let weightGradient = GateWeight(weight: weightGradientMatrix)
return (output, weightGradient)
}
func getValue(input: Vector, withWeightDiff weightDiff: Weight, scale: Double) -> Vector {
let newWeight = (self.weight + weightDiff * scale) as! GateWeight
let weightedInput = (newWeight.weight * input) as! Vector
let maxValue: Double = weightedInput.toArray().maxElement()!
let limitedWeightedInput = weightedInput - Vector.filledWith(maxValue, size: weightedInput.size)
let plainOutput = limitedWeightedInput.map(exp)
let output = plainOutput.normalizedVector(Int(LA_L1_NORM))
return output
}
func addWeight(weightDiff: Weight) {
self.weight = (self.weight + weightDiff) as! GateWeight
}
}
細かい説明はしないけど、やってる計算は強化学習用のニューラルネットワークをSwiftで書いてみた。(その4) - いものやま。に書いたとおり。
ValueHMEクラス
そして、強化学習用のHMEの実装。
import Foundation
class ValueHME: ValueNetwork {
class HMEWeight: Weight {
private let expertWeight: [Weight]
private let gateWeight: Weight
private init(expertWeight: [Weight], gateWeight: Weight) {
self.expertWeight = expertWeight
self.gateWeight = gateWeight
}
func scale(scalar: Double) -> Weight {
let expertWeight = self.expertWeight.map {
(weight: Weight) in
return weight * scalar
}
let gateWeight = self.gateWeight * scalar
return HMEWeight(expertWeight: expertWeight, gateWeight: gateWeight)
}
func add(other: Weight) -> Weight {
let otherWeight = other as! HMEWeight
let expertWeight = zip(self.expertWeight, otherWeight.expertWeight).map {
(selfExpertWeight: Weight, otherExpertWeight: Weight) in
return selfExpertWeight + otherExpertWeight
}
let gateWeight = self.gateWeight + otherWeight.gateWeight
return HMEWeight(expertWeight: expertWeight, gateWeight: gateWeight)
}
func subtract(other: Weight) -> Weight {
let otherWeight = other as! HMEWeight
let expertWeight = zip(self.expertWeight, otherWeight.expertWeight).map {
(selfExpertWeight: Weight, otherExpertWeight: Weight) in
return selfExpertWeight - otherExpertWeight
}
let gateWeight = self.gateWeight - otherWeight.gateWeight
return HMEWeight(expertWeight: expertWeight, gateWeight: gateWeight)
}
}
class func create(inputSize: Int, outputMin: Double, outputMax: Double, structure: [AnyObject]) -> ValueHME {
var experts: [ValueNetwork] = []
for item in structure {
if let hiddenUnitSize = item as? Int {
let valueNN = ValueNN(inputSize: inputSize,
hiddenUnitSize: hiddenUnitSize,
outputMin: outputMin,
outputMax: outputMax)
experts.append(valueNN)
} else if let lowerStructure = item as? [AnyObject] {
let lowerHME = ValueHME.create(inputSize,
outputMin: outputMin,
outputMax: outputMax,
structure: lowerStructure)
experts.append(lowerHME)
}
}
return ValueHME(inputSize: inputSize, experts: experts)
}
private let experts: [ValueNetwork]
private let gateNetwork: GateNetwork
init(inputSize: Int, experts: [ValueNetwork]) {
self.experts = experts
self.gateNetwork = GateNetwork(inputSize: inputSize, outputSize: experts.count)
}
func getValue(input: Vector) -> Double {
let expertOutputArray = self.experts.map {
(expert: ValueNetwork) in
return expert.getValue(input)
}
let expertOutput = Vector.fromArray(expertOutputArray)
let gateOutput = self.gateNetwork.getValue(input)
return gateOutput +* expertOutput
}
func getValueAndWeightGradient(input: Vector) -> (Double, Weight) {
var expertOutputArray: [Double] = []
var expertWeightGradientArray: [Weight] = []
for expert in self.experts {
let (output, weightGradient) = expert.getValueAndWeightGradient(input)
expertOutputArray.append(output)
expertWeightGradientArray.append(weightGradient)
}
let expertOutput = Vector.fromArray(expertOutputArray)
let (gateOutput, gateWeightGradient) = self.gateNetwork.getValueAndWeightGradient(input, expertOutput: expertOutput)
let expertWeightGradient = zip(gateOutput.toArray(), expertWeightGradientArray).map {
(gateOutput: Double, expertWeightGradient: Weight) in
return gateOutput * expertWeightGradient
}
let output = gateOutput +* expertOutput
let weightGradient = HMEWeight(expertWeight: expertWeightGradient, gateWeight: gateWeightGradient)
return (output, weightGradient)
}
func getValue(input: Vector, withWeightDiff weightDiff: Weight, scale: Double) -> Double {
let hmeWeightDiff = weightDiff as! HMEWeight
let expertOutputArray = zip(self.experts, hmeWeightDiff.expertWeight).map {
(expert: ValueNetwork, expertWeightDiff: Weight) in
return expert.getValue(input, withWeightDiff: expertWeightDiff, scale: scale)
}
let expertOutput = Vector.fromArray(expertOutputArray)
let gateOutput = self.gateNetwork.getValue(input, withWeightDiff: hmeWeightDiff.gateWeight, scale: scale)
return gateOutput +* expertOutput
}
func addWeight(weightDiff: Weight) {
let hmeWeightDiff = weightDiff as! HMEWeight
for (expert, expertWeightDiff) in zip(self.experts, hmeWeightDiff.expertWeight) {
expert.addWeight(expertWeightDiff)
}
self.gateNetwork.addWeight(hmeWeightDiff.gateWeight)
}
}
こちらも強化学習用のニューラルネットワークをSwiftで書いてみた。(その4) - いものやま。に書いたとおり。
動作確認
動作確認として、次のようなコードを書いた。
(ValueNNの動作確認のコードを修正し、GateNetwork、ValueHMEの動作確認を追加している)
import Foundation
let inputMatrix = Matrix.fromArray([[1.0, 1.0, 1.0],
[1.0, 1.0, 0.0],
[1.0, 0.0, 1.0],
[0.0, 1.0, 1.0],
[1.0, 0.0, 0.0],
[0.0, 1.0, 0.0],
[0.0, 0.0, 1.0],
[0.0, 0.0, 0.0]]).transpose()
print("----------")
print("ValueNN")
print("----------")
let valueNN = ValueNN(inputSize: 3, hiddenUnitSize: 10, outputMin: -1.0, outputMax: 1.0)
for col in (0..<inputMatrix.col) {
let input = inputMatrix.colVector(col)
let (output, weightGradient) = valueNN.getValueAndWeightGradient(input)
let outputWithWeightGradient = valueNN.getValue(input, withWeightDiff: weightGradient, scale: 1.0)
var diff = outputWithWeightGradient - output
var scale = 1.0
var upperBound: Double! = nil
var lowerBound: Double! = nil
var last = 100
for i in (0..<100) {
if (diff < 0.0) || (1.1 < diff) {
upperBound = scale
scale = (lowerBound == nil) ? scale / 2.0 : (upperBound + lowerBound) / 2.0
} else if diff < 0.9 {
lowerBound = scale
scale = (upperBound == nil) ? scale * 2.0 : (upperBound + lowerBound) / 2.0
} else {
last = i
break
}
let outputWithScaledWeightGradient = valueNN.getValue(input, withWeightDiff: weightGradient, scale: scale)
diff = outputWithScaledWeightGradient - output
}
let outputWith01Scaled = valueNN.getValue(input, withWeightDiff: weightGradient, scale: 0.1 * scale)
let diffWith01Scaled = outputWith01Scaled - output
print("input: \(input.transpose()), output: \(output)")
print(" scale: \(scale), iterations: \(last)")
print(" diff (scaled): \(diff), diff (0.1*scaled): \(diffWith01Scaled)")
let weightDiff = weightGradient * 0.1 * scale
valueNN.addWeight(weightDiff)
let newOutput = valueNN.getValue(input)
let newDiff = newOutput - output
print(" new output: \(newOutput), diff: \(newDiff)")
}
print("----------")
print("GateNetwork")
print("----------")
let gateNetwork = GateNetwork(inputSize: 3, outputSize: 2)
let expertOutput = Vector.fromArray([0.2, 0.8])
for col in (0..<inputMatrix.col) {
let input = inputMatrix.colVector(col)
let (gateOutput, weightGradient) = gateNetwork.getValueAndWeightGradient(input, expertOutput: expertOutput)
let output = gateOutput +* expertOutput
let gateOutputWithWeightGradient = gateNetwork.getValue(input, withWeightDiff: weightGradient, scale: 1.0)
let outputWithWeightGradient = gateOutputWithWeightGradient +* expertOutput
let diff = outputWithWeightGradient - output
print("input: \(input.transpose()), output: \(output)")
print(" with gradient: \(outputWithWeightGradient), diff: \(diff)")
gateNetwork.addWeight(weightGradient)
let newGateOutput = gateNetwork.getValue(input)
let newOutput = newGateOutput +* expertOutput
let newDiff = newOutput - output
print(" new output: \(newOutput), diff: \(newDiff)")
}
print("----------")
print("ValueHME")
print("----------")
let valueHME = ValueHME.create(3, outputMin: -1.0, outputMax: 1.0, structure: [10, 10])
for col in (0..<inputMatrix.col) {
let input = inputMatrix.colVector(col)
let (output, weightGradient) = valueHME.getValueAndWeightGradient(input)
let outputWithWeightGradient = valueHME.getValue(input, withWeightDiff: weightGradient, scale: 1.0)
var diff = outputWithWeightGradient - output
var scale = 1.0
var upperBound: Double! = nil
var lowerBound: Double! = nil
var last = 100
for i in (0..<100) {
if (diff < 0.0) || (1.1 < diff) {
upperBound = scale
scale = (lowerBound == nil) ? scale / 2.0 : (upperBound + lowerBound) / 2.0
} else if diff < 0.9 {
lowerBound = scale
scale = (upperBound == nil) ? scale * 2.0 : (upperBound + lowerBound) / 2.0
} else {
last = i
break
}
let outputWithScaledWeightGradient = valueHME.getValue(input, withWeightDiff: weightGradient, scale: scale)
diff = outputWithScaledWeightGradient - output
}
let outputWith01Scaled = valueHME.getValue(input, withWeightDiff: weightGradient, scale: 0.1 * scale)
let diffWith01Scaled = outputWith01Scaled - output
print("input: \(input.transpose()), output: \(output)")
print(" scale: \(scale), iterations: \(last)")
print(" diff (scaled): \(diff), diff (0.1*scaled): \(diffWith01Scaled)")
let weightDiff = weightGradient * 0.1 * scale
valueHME.addWeight(weightDiff)
let newOutput = valueHME.getValue(input)
let newDiff = newOutput - output
print(" new output: \(newOutput), diff: \(newDiff)")
}
実行例は、以下:
----------
ValueNN
----------
input: [1.0, 1.0, 1.0], output: -0.183942077296883
scale: 0.1875, iterations: 4
diff (scaled): 1.01724963088052, diff (0.1*scaled): 0.105130155911477
new output: -0.0788119213854061, diff: 0.105130155911477
input: [1.0, 1.0, 0.0], output: -0.0727086141640791
scale: 0.25, iterations: 2
diff (scaled): 1.09166909286381, diff (0.1*scaled): 0.129576278870883
new output: 0.0568676647068041, diff: 0.129576278870883
input: [1.0, 0.0, 1.0], output: -0.383684591887409
scale: 0.25, iterations: 2
diff (scaled): 1.07005618759768, diff (0.1*scaled): 0.115689382429312
new output: -0.267995209458097, diff: 0.115689382429312
input: [0.0, 1.0, 1.0], output: 0.501642422844154
scale: 1.25, iterations: 3
diff (scaled): 1.03458420400201, diff (0.1*scaled): 0.463696492593989
new output: 0.965338915438143, diff: 0.463696492593989
input: [1.0, 0.0, 0.0], output: 0.0147913651058598
scale: 0.5, iterations: 1
diff (scaled): 1.09849345190749, diff (0.1*scaled): 0.224957607158544
new output: 0.239748972264404, diff: 0.224957607158544
input: [0.0, 1.0, 0.0], output: 1.00349356627074
scale: 18.0, iterations: 8
diff (scaled): 1.04838464458158, diff (0.1*scaled): 0.0657863751854824
new output: 1.06927994145622, diff: 0.0657863751854824
input: [0.0, 0.0, 1.0], output: 1.00445024397176
scale: 18.0, iterations: 8
diff (scaled): 1.02882994407428, diff (0.1*scaled): 0.070282826622567
new output: 1.07473307059433, diff: 0.070282826622567
input: [0.0, 0.0, 0.0], output: 1.04718514605304
scale: 20.0, iterations: 7
diff (scaled): 0.965905785136957, diff (0.1*scaled): 0.0690312508752593
new output: 1.1162163969283, diff: 0.0690312508752593
----------
GateNetwork
----------
input: [1.0, 1.0, 1.0], output: 0.5
with gradient: 0.626569701575002, diff: 0.126569701575002
new output: 0.626569701575002, diff: 0.126569701575002
input: [1.0, 1.0, 0.0], output: 0.587393783735477
with gradient: 0.627638891939002, diff: 0.0402451082035251
new output: 0.627638891939002, diff: 0.0402451082035251
input: [1.0, 0.0, 1.0], output: 0.60807465845349
with gradient: 0.637937581551879, diff: 0.0298629230983883
new output: 0.637937581551879, diff: 0.0298629230983883
input: [0.0, 1.0, 1.0], output: 0.623373727874239
with gradient: 0.645955025290896, diff: 0.022581297416657
new output: 0.645955025290896, diff: 0.022581297416657
input: [1.0, 0.0, 0.0], output: 0.583841706173601
with gradient: 0.605472969776552, diff: 0.0216312636029506
new output: 0.605472969776552, diff: 0.0216312636029506
input: [0.0, 1.0, 0.0], output: 0.580303255512027
with gradient: 0.602887409981527, diff: 0.0225841544694998
new output: 0.602887409981527, diff: 0.0225841544694998
input: [0.0, 0.0, 1.0], output: 0.575481724278274
with gradient: 0.59937319511023, diff: 0.0238914708319556
new output: 0.59937319511023, diff: 0.0238914708319556
input: [0.0, 0.0, 0.0], output: 0.5
with gradient: 0.5, diff: 0.0
new output: 0.5, diff: 0.0
----------
ValueHME
----------
input: [1.0, 1.0, 1.0], output: 0.583369090800088
scale: 12.0, iterations: 5
diff (scaled): 0.955143642897197, diff (0.1*scaled): 0.55711728368868
new output: 1.14048637448877, diff: 0.55711728368868
input: [1.0, 1.0, 0.0], output: 1.17912976992144
scale: 10.0, iterations: 6
diff (scaled): 0.958244431157407, diff (0.1*scaled): 0.218163118461483
new output: 1.39729288838292, diff: 0.218163118461483
input: [1.0, 0.0, 1.0], output: 1.26053893688435
scale: 7.0, iterations: 5
diff (scaled): 1.00825100197074, diff (0.1*scaled): 0.100641423159278
new output: 1.36118036004363, diff: 0.100641423159278
input: [0.0, 1.0, 1.0], output: 1.25521721395406
scale: 7.0, iterations: 5
diff (scaled): 0.98711439450816, diff (0.1*scaled): 0.0774174523384112
new output: 1.33263466629247, diff: 0.0774174523384112
input: [1.0, 0.0, 0.0], output: 1.37228160440216
scale: 24.0, iterations: 6
diff (scaled): 0.926736515167874, diff (0.1*scaled): 0.151453564687681
new output: 1.52373516908985, diff: 0.151453564687681
input: [0.0, 1.0, 0.0], output: 1.42285096816873
scale: 10.0, iterations: 6
diff (scaled): 0.995109500116182, diff (0.1*scaled): 0.0862919554937307
new output: 1.50914292366246, diff: 0.0862919554937307
input: [0.0, 0.0, 1.0], output: 1.33805920315436
scale: 7.0, iterations: 5
diff (scaled): 1.0092130994799, diff (0.1*scaled): 0.0818008367414018
new output: 1.41986003989576, diff: 0.0818008367414018
input: [0.0, 0.0, 0.0], output: 1.33449511265373
scale: 16.0, iterations: 4
diff (scaled): 0.947467726275787, diff (0.1*scaled): 0.0741106840380314
new output: 1.40860579669177, diff: 0.0741106840380314
これでValueNetworkの実装はOK、と言いたいところなんだけど、このままだとデータの保存や復帰が出来ないので、ちょっと困る。
なので、データの保存・復帰が出来るように修正していきたい。
今日はここまで!