-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlstm.go
141 lines (117 loc) · 4.66 KB
/
lstm.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
package main
import (
"fmt"
"math"
)
type LongShortTermMemory struct {
RecurrentNeuralNetwork
inputGate [][]float64 // Input gate activations
forgetGate [][]float64 // Forget gate activations
outputGate [][]float64 // Output gate activations
cellState [][]float64 // Cell states
cellStatePrev [][]float64 // Previous cell states
}
// NewLongShortTermMemory initializes an LSTM with recurrent connections.
func NewLongShortTermMemory(inputSize int, hiddenLayers []int, outputSize int, activationType string, learningRate float64, dropoutRate float64) *LongShortTermMemory {
lstm := &LongShortTermMemory{
RecurrentNeuralNetwork: *NewRecurrentNeuralNetwork(inputSize, hiddenLayers, outputSize, activationType, learningRate, dropoutRate),
}
// Initialize gates and cell states
for _, size := range hiddenLayers {
lstm.inputGate = append(lstm.inputGate, make([]float64, size))
lstm.forgetGate = append(lstm.forgetGate, make([]float64, size))
lstm.outputGate = append(lstm.outputGate, make([]float64, size))
lstm.cellState = append(lstm.cellState, make([]float64, size))
lstm.cellStatePrev = append(lstm.cellStatePrev, make([]float64, size))
}
return lstm
}
// LSTM forward pass
func (lstm *LongShortTermMemory) forward(inputs [][]float64) []float64 {
timeSteps := len(inputs)
var finalOutput []float64
// Reset the layer inputs and outputs to store each time step's values
lstm.layerInputs = [][]float64{}
lstm.layerOutputs = [][]float64{}
// Iterate over each time step
for t := 0; t < timeSteps; t++ {
layerInput := inputs[t] // Input at the current time step
// Ensure the input size matches the expected input size
if len(layerInput) != lstm.inputSize {
panic(fmt.Sprintf("Input size mismatch at time step %d: expected %d, got %d", t, lstm.inputSize, len(layerInput)))
}
// Store the input for this time step
// lstm.layerInputs = append(lstm.layerInputs, layerInput)
// Iterate over each hidden layer
for i := 0; i < len(lstm.hiddenLayers); i++ {
layerOutput := make([]float64, lstm.hiddenLayers[i])
// Compute input, forget, and output gates
for j := 0; j < lstm.hiddenLayers[i]; j++ {
// Input gate
inputGateVal := lstm.biases[i][j]
for k := 0; k < len(layerInput); k++ {
inputGateVal += layerInput[k] * lstm.weights[i][k][j]
}
for k := 0; k < len(lstm.hiddenStates[i]); k++ {
inputGateVal += lstm.hiddenStates[i][k] * lstm.recurrentWeights[i][k][j]
}
lstm.inputGate[i][j] = sigmoid(inputGateVal)
// Forget gate
forgetGateVal := lstm.biases[i][j]
for k := 0; k < len(layerInput); k++ {
forgetGateVal += layerInput[k] * lstm.weights[i][k][j]
}
for k := 0; k < len(lstm.hiddenStates[i]); k++ {
forgetGateVal += lstm.hiddenStates[i][k] * lstm.recurrentWeights[i][k][j]
}
lstm.forgetGate[i][j] = sigmoid(forgetGateVal)
// Output gate
outputGateVal := lstm.biases[i][j]
for k := 0; k < len(layerInput); k++ {
outputGateVal += layerInput[k] * lstm.weights[i][k][j]
}
for k := 0; k < len(lstm.hiddenStates[i]); k++ {
outputGateVal += lstm.hiddenStates[i][k] * lstm.recurrentWeights[i][k][j]
}
lstm.outputGate[i][j] = sigmoid(outputGateVal)
}
// Update cell state
for j := 0; j < lstm.hiddenLayers[i]; j++ {
cellInput := tanh(lstm.biases[i][j]) // New cell input
lstm.cellState[i][j] = lstm.forgetGate[i][j]*lstm.cellStatePrev[i][j] + lstm.inputGate[i][j]*cellInput
layerOutput[j] = lstm.outputGate[i][j] * tanh(lstm.cellState[i][j])
}
// Update hidden states for the next time step
lstm.hiddenStates[i] = layerOutput
lstm.cellStatePrev[i] = lstm.cellState[i]
// Apply dropout
layerOutput = lstm.applyDropout(layerOutput, lstm.dropoutRate)
// Store the layer output for this time step
lstm.layerInputs = append(lstm.layerInputs, layerInput)
lstm.layerOutputs = append(lstm.layerOutputs, layerOutput)
layerInput = layerOutput
}
// Output layer (compute output at the current time step)
finalOutput = make([]float64, lstm.outputSize)
for i := 0; i < lstm.outputSize; i++ {
neuron := lstm.biases[len(lstm.biases)-1][i]
for j := 0; j < len(layerInput); j++ {
neuron += layerInput[j] * lstm.weights[len(lstm.weights)-1][j][i]
}
finalOutput[i] = lstm.activate(neuron)
}
// Store the final output for this time step
lstm.layerInputs = append(lstm.layerInputs, layerInput)
lstm.layerOutputs = append(lstm.layerOutputs, finalOutput)
}
// Return the final output after the last time step
return finalOutput
}
// Sigmoid activation function
func sigmoid(x float64) float64 {
return 1.0 / (1.0 + math.Exp(-x))
}
// Tanh activation function
func tanh(x float64) float64 {
return math.Tanh(x)
}