いものやま。

雑多な知識の寄せ集め

強化学習用のニューラルネットワークをSwiftで書いてみた。(その3)

昨日は強化学習用のニューラルネットワークの計算を行列で表現した。

今日はそれを使って実際に実装していく。

なお、Swiftでの行列演算については、以下を参照:

ここで定義したMatrixクラス、Vectorクラスを使っていくことになる。

また、Rubyでの実装は以下を参照:
(行列計算を使ってないので、読むのがかなり大変だけど・・・)

ニューラルネットワークの仕様は、上記のRubyのものと同じにする。

ValueNetworkプロトコル

まず、強化学習関数近似に使うのがニューラルネットワークでもHMEでもいいように、インタフェースとなるプロトコルを定義する:

//==============================
// ValueNetwork
//------------------------------
// ValueNetwork.swift
//==============================

import Foundation

protocol ValueNetwork {
  func getValue(input: Vector) -> Double
  func getValueAndWeightGradient(input: Vector) -> (Double, Weight)
  func getValue(input: Vector, withWeightDiff weightDiff: Weight, scale: Double) -> Double
  func addWeight(weightDiff: Weight)
}

定義の中でWeightという型が使われてるけど、これは次に定義するプロトコルで、重みの内部構成を隠蔽するためのインタフェース。

Weightプロトコル

ということで、Weightプロトコルの定義:

//==============================
// ValueNetwork
//------------------------------
// Weight.swift
//==============================

import Foundation

protocol Weight {
  func scale(scalar: Double) -> Weight
  func add(other: Weight) -> Weight
  func subtract(other: Weight) -> Weight
}

func *(left: Weight, right: Double) -> Weight {
  return left.scale(right)
}

func *(left: Double, right: Weight) -> Weight {
  return right.scale(left)
}

func +(left: Weight, right: Weight) -> Weight {
  return left.add(right)
}

func -(left: Weight, right: Weight) -> Weight {
  return left.subtract(right)
}

重みに対して行いたいことは、足し算(と引き算)と掛け算なので、それらをインタフェースとして定義している。
また、記述が簡単になるように、演算子オーバーロードも行っている。

ValueNNクラス

そして、強化学習用のニューラルネットワークの実装。
ValueNNクラスとして実装した:

//==============================
// ValueNetwork
//------------------------------
// ValueNN.swift
//==============================

import Foundation

class ValueNN: ValueNetwork {
  class NNWeight: Weight {
    private let hiddenLayerWeight: Matrix
    private let hiddenLayerBias: Vector
    private let outputLayerWeight: Vector
    private let outputLayerBias: Double
    
    private init(hiddenLayerWeight: Matrix,
                 hiddenLayerBias: Vector,
                 outputLayerWeight: Vector,
                 outputLayerBias: Double) {
      self.hiddenLayerWeight = hiddenLayerWeight
      self.hiddenLayerBias = hiddenLayerBias
      self.outputLayerWeight = outputLayerWeight
      self.outputLayerBias = outputLayerBias
    }
    
    func scale(scalar: Double) -> Weight {
      let hiddenLayerWeight = self.hiddenLayerWeight * scalar
      let hiddenLayerBias = self.hiddenLayerBias * scalar
      let outputLayerWeight = self.outputLayerWeight * scalar
      let outputLayerBias = self.outputLayerBias * scalar
      return NNWeight(hiddenLayerWeight: hiddenLayerWeight,
                      hiddenLayerBias: hiddenLayerBias,
                      outputLayerWeight: outputLayerWeight,
                      outputLayerBias: outputLayerBias)
    }
    
    func add(other: Weight) -> Weight {
      let otherNNWeight = other as! NNWeight
      let hiddenLayerWeight = self.hiddenLayerWeight + otherNNWeight.hiddenLayerWeight
      let hiddenLayerBias = self.hiddenLayerBias + otherNNWeight.hiddenLayerBias
      let outputLayerWeight = self.outputLayerWeight + otherNNWeight.outputLayerWeight
      let outputLayerBias = self.outputLayerBias + otherNNWeight.outputLayerBias
      return NNWeight(hiddenLayerWeight: hiddenLayerWeight,
                      hiddenLayerBias: hiddenLayerBias,
                      outputLayerWeight: outputLayerWeight,
                      outputLayerBias: outputLayerBias)
    }
    
    func subtract(other: Weight) -> Weight {
      let otherNNWeight = other as! NNWeight
      let hiddenLayerWeight = self.hiddenLayerWeight - otherNNWeight.hiddenLayerWeight
      let hiddenLayerBias = self.hiddenLayerBias - otherNNWeight.hiddenLayerBias
      let outputLayerWeight = self.outputLayerWeight - otherNNWeight.outputLayerWeight
      let outputLayerBias = self.outputLayerBias - otherNNWeight.outputLayerBias
      return NNWeight(hiddenLayerWeight: hiddenLayerWeight,
                      hiddenLayerBias: hiddenLayerBias,
                      outputLayerWeight: outputLayerWeight,
                      outputLayerBias: outputLayerBias)
    }
  }
  
  private static let activationNormalGradient: Double = 1.0
  private static let activationLesserGradient: Double = 0.1
  
  private var weight: NNWeight
  let outputMin: Double
  let outputMax: Double
  
  init(inputSize: Int, hiddenUnitSize: Int, outputMin: Double, outputMax: Double) {
    self.outputMin = outputMin
    self.outputMax = outputMax
    
    let hiddenLayerWeightVariance = 1.0 / (Double(inputSize) + 1.0)
    let hiddenLayerWeightGenerator = NormalDistRandom.init(expected: 0.0, variance: hiddenLayerWeightVariance)
    
    let hiddenLayerWeightBuffer = (0..<hiddenUnitSize).map { _ in
      return (0..<inputSize).map { _ in
        return hiddenLayerWeightGenerator.getRandom()
      }
    }
    let hiddenLayerWeight = Matrix.fromArray(hiddenLayerWeightBuffer)
    
    let hiddenLayerBiasBuffer = (0..<hiddenUnitSize).map { _ in
      return hiddenLayerWeightGenerator.getRandom()
    }
    let hiddenLayerBias = Vector.fromArray(hiddenLayerBiasBuffer)
    
    let outputLayerWeightVariance = 1.0 / (Double(hiddenUnitSize) + 1.0)
    let outputLayerWeightGenerator = NormalDistRandom(expected: 0.0, variance: outputLayerWeightVariance)
    
    let outputLayerWeightBuffer = (0..<hiddenUnitSize).map { _ in
      return outputLayerWeightGenerator.getRandom()
    }
    let outputLayerWeight = Vector.fromArray(outputLayerWeightBuffer)
    
    let outputLayerBias = outputLayerWeightGenerator.getRandom()
    
    self.weight = NNWeight(hiddenLayerWeight: hiddenLayerWeight,
                           hiddenLayerBias: hiddenLayerBias,
                           outputLayerWeight: outputLayerWeight,
                           outputLayerBias: outputLayerBias)
  }
  
  func getValue(input: Vector) -> Double {
    let hiddenLayerWeightedInput = ((self.weight.hiddenLayerWeight * input) as! Vector) + self.weight.hiddenLayerBias
    let hiddenLayerOutput = hiddenLayerWeightedInput.map {
      [unowned self] (weightedInput: Double) in
      return self.hiddenLayerOutputForWeightedInput(weightedInput)
    }
    
    let outputLayerWeightedInput = self.weight.outputLayerWeight +* hiddenLayerOutput + self.weight.outputLayerBias
    let outputLayerOutput = self.outputLayerOutputForWeightedInput(outputLayerWeightedInput)

    return outputLayerOutput
  }
  
  func getValueAndWeightGradient(input: Vector) -> (Double, Weight) {
    let hiddenLayerWeightedInput = ((self.weight.hiddenLayerWeight * input) as! Vector) + self.weight.hiddenLayerBias
    let hiddenLayerOutput = hiddenLayerWeightedInput.map {
      [unowned self] (weightedInput: Double) in
      return self.hiddenLayerOutputForWeightedInput(weightedInput)
    }
    let hiddenLayerGradient = hiddenLayerWeightedInput.map {
      [unowned self] (weightedInput: Double) in
      return self.hiddenLayerGradientForWeightedInput(weightedInput)
    }
    
    let outputLayerWeightedInput = self.weight.outputLayerWeight +* hiddenLayerOutput + self.weight.outputLayerBias
    let outputLayerOutput = self.outputLayerOutputForWeightedInput(outputLayerWeightedInput)
    let outputLayerGradient = self.outputLayerGradientForWeightedInput(outputLayerWeightedInput)
    
    let outputLayerDelta = outputLayerGradient
    let hiddenLayerDelta = outputLayerDelta * self.weight.outputLayerWeight <*> hiddenLayerGradient
    
    let hiddenLayerWeightGradient = hiddenLayerDelta *+ input
    let hiddenLayerBiasGradient = hiddenLayerDelta
    let outputLayerWeightGradient = outputLayerDelta * hiddenLayerOutput
    let outputLayerBiasGradint = outputLayerDelta
    let weightGradient = NNWeight(hiddenLayerWeight: hiddenLayerWeightGradient,
                                  hiddenLayerBias: hiddenLayerBiasGradient,
                                  outputLayerWeight: outputLayerWeightGradient,
                                  outputLayerBias: outputLayerBiasGradint)
    
    return (outputLayerOutput, weightGradient)
  }
  
  func getValue(input: Vector, withWeightDiff weightDiff: Weight, scale: Double) -> Double {
    let newWeight = (self.weight + weightDiff * scale) as! NNWeight
    
    let hiddenLayerWeightedInput = ((newWeight.hiddenLayerWeight * input) as! Vector) + newWeight.hiddenLayerBias
    let hiddenLayerOutput = hiddenLayerWeightedInput.map {
      [unowned self] (weightedInput: Double) in
      return self.hiddenLayerOutputForWeightedInput(weightedInput)
    }
    
    let outputLayerWeightedInput = newWeight.outputLayerWeight +* hiddenLayerOutput + newWeight.outputLayerBias
    let outputLayerOutput = self.outputLayerOutputForWeightedInput(outputLayerWeightedInput)
    
    return outputLayerOutput
  }
  
  func addWeight(weightDiff: Weight) {
    self.weight = (self.weight + weightDiff) as! NNWeight
  }
  
  private func hiddenLayerOutputForWeightedInput(weightedInput: Double) -> Double {
    if weightedInput >= 0.0 {
      return ValueNN.activationNormalGradient * weightedInput
    } else {
      return ValueNN.activationLesserGradient * weightedInput
    }
  }
  
  private func hiddenLayerGradientForWeightedInput(weightedInput: Double) -> Double {
    if weightedInput >= 0.0 {
      return ValueNN.activationNormalGradient
    } else {
      return ValueNN.activationLesserGradient
    }
  }
  
  private func outputLayerOutputForWeightedInput(weightedInput: Double) -> Double {
    if weightedInput < self.outputMin {
      return (ValueNN.activationLesserGradient * weightedInput
                + (ValueNN.activationNormalGradient - ValueNN.activationLesserGradient) * self.outputMin)
    } else if weightedInput < self.outputMax {
      return ValueNN.activationNormalGradient * weightedInput
    } else {
      return (ValueNN.activationLesserGradient * weightedInput
                + (ValueNN.activationNormalGradient - ValueNN.activationLesserGradient) * self.outputMax)
    }
  }
  
  private func outputLayerGradientForWeightedInput(weightedInput: Double) -> Double {
    if (weightedInput < self.outputMin) || (self.outputMax < weightedInput) {
      return ValueNN.activationLesserGradient
    } else {
      return ValueNN.activationNormalGradient
    }
  }
}

細かく説明しないけど、強化学習用のニューラルネットワークをSwiftで書いてみた。(その2) - いものやま。に書いた行列計算をそのまま実装している。
やっている計算はRubyのものと一緒なんだけど、Matrixクラス、Vectorクラスを定義してあるので、(比較的)スッキリした実装になっている。

動作確認

動作確認として、Rubyのときと同様に、以下のコードを書いた:

//==============================
// ValueNetwork
//------------------------------
// main.swift
//
// Test code for ValueNetwork
//==============================

import Foundation

// ValueNN

let valueNN = ValueNN(inputSize: 3, hiddenUnitSize: 10, outputMin: -1.0, outputMax: 1.0)

let inputMatrix = Matrix.fromArray([[1.0, 1.0, 1.0],
                                    [1.0, 1.0, 0.0],
                                    [1.0, 0.0, 1.0],
                                    [0.0, 1.0, 1.0],
                                    [1.0, 0.0, 0.0],
                                    [0.0, 1.0, 0.0],
                                    [0.0, 0.0, 1.0]]).transpose()

for col in (0..<inputMatrix.col) {
  let input = inputMatrix.colVector(col)
  let (output, weightGradient) = valueNN.getValueAndWeightGradient(input)
  let outputWithWeightGradient = valueNN.getValue(input, withWeightDiff: weightGradient, scale: 1.0)
  var diff = outputWithWeightGradient - output
  
  var scale = 1.0
  var upperBound: Double! = nil
  var lowerBound: Double! = nil
  var last = 100
  for i in (0..<100) {
    if (diff < 0.0) || (1.1 < diff) {
      upperBound =  scale
      scale = (lowerBound == nil) ? scale / 2.0 : (upperBound + lowerBound) / 2.0
    } else if diff < 0.9 {
      lowerBound = scale
      scale = (upperBound == nil) ? scale * 2.0 : (upperBound + lowerBound) / 2.0
    } else {
      last = i
      break
    }
    
    let outputWithScaledWeightGradient = valueNN.getValue(input, withWeightDiff: weightGradient, scale: scale)
    diff = outputWithScaledWeightGradient - output
  }
  
  let outputWith01Scaled = valueNN.getValue(input, withWeightDiff: weightGradient, scale: 0.1 * scale)
  let diffWith01Scaled = outputWith01Scaled - output
  
  print("input: \(input.transpose()), output: \(output)")
  print("  scale: \(scale), iterations: \(last)")
  print("  diff (scaled): \(diff), diff (0.1*scaled): \(diffWith01Scaled)")
  
  let weightDiff = weightGradient * 0.1 * scale
  valueNN.addWeight(weightDiff)
  let newOutput = valueNN.getValue(input)
  let newDiff = newOutput - output
  print("  new output: \(newOutput), diff: \(newDiff)")
}

// 以下略

実行例は、以下:

----------
ValueNN
----------
input: [1.0, 1.0, 1.0], output: 0.131021133192784
  scale: 0.125, iterations: 3
  diff (scaled): 0.939252246852243, diff (0.1*scaled): 0.155481391989248
  new output: 0.286502525182032, diff: 0.155481391989248
input: [1.0, 1.0, 0.0], output: 0.363438002291251
  scale: 0.5, iterations: 1
  diff (scaled): 0.92473632645851, diff (0.1*scaled): 0.331184714810343
  new output: 0.694622717101594, diff: 0.331184714810343
input: [1.0, 0.0, 1.0], output: 0.0041779249095818
  scale: 0.125, iterations: 3
  diff (scaled): 1.03338785763323, diff (0.1*scaled): 0.13662619759527
  new output: 0.140804122504852, diff: 0.13662619759527
input: [0.0, 1.0, 1.0], output: 0.257684625662284
  scale: 0.5, iterations: 1
  diff (scaled): 0.992301758621281, diff (0.1*scaled): 0.335977958448971
  new output: 0.593662584111254, diff: 0.335977958448971
input: [1.0, 0.0, 0.0], output: 0.184531558718531
  scale: 0.5, iterations: 1
  diff (scaled): 1.01382568012975, diff (0.1*scaled): 0.27650775017151
  new output: 0.461039308890041, diff: 0.27650775017151
input: [0.0, 1.0, 0.0], output: 0.166668276265283
  scale: 0.5, iterations: 1
  diff (scaled): 0.974321711025463, diff (0.1*scaled): 0.217514514547388
  new output: 0.384182790812671, diff: 0.217514514547388
input: [0.0, 0.0, 1.0], output: 0.180236823017496
  scale: 0.5, iterations: 1
  diff (scaled): 0.945552060789391, diff (0.1*scaled): 0.200679893784258
  new output: 0.380916716801753, diff: 0.200679893784258
# 以下略

今日はここまで!