我有以下代码,当我运行theano_build()方法时,它抛出了一个错误,显示
File "rnn_theano.py", line 28, in __init__self.__theano_build__() File "rnn_theano.py", line 45, in __theano_build__non_sequences=[U, V, W1, W12, W2], File "/usr/local/lib/python2.7/dist-packages/theano/scan_module/scan.py", line 745, in scancondition, outputs, updates = scan_utils.get_updates_and_outputs(fn(*args))TypeError: forward_prop_step() takes exactly 8 arguments (7 given)
以下是Theano中的代码。这基本上是一个两层隐藏层的递归神经网络
import numpy as npimport theano as theanoimport theano.tensor as Tfrom utils import *import operatorclass RNNTheano: def __init__(self, word_dim, hidden_dim=100, bptt_truncate=4): # 分配实例变量 self.word_dim = word_dim self.hidden_dim = hidden_dim self.bptt_truncate = bptt_truncate # 随机初始化网络参数 U = np.random.uniform(-np.sqrt(1./word_dim), np.sqrt(1./word_dim), (hidden_dim, word_dim)) V = np.random.uniform(-np.sqrt(1./hidden_dim), np.sqrt(1./hidden_dim), (word_dim, hidden_dim)) W1 = np.random.uniform(-np.sqrt(1./hidden_dim), np.sqrt(1./hidden_dim), (hidden_dim, hidden_dim)) W12 = np.random.uniform(-np.sqrt(1./hidden_dim), np.sqrt(1./hidden_dim), (hidden_dim, hidden_dim)) W2 = np.random.uniform(-np.sqrt(1./hidden_dim), np.sqrt(1./hidden_dim), (hidden_dim, hidden_dim)) # Theano: 创建共享变量 self.U = theano.shared(name='U', value=U.astype(theano.config.floatX)) self.V = theano.shared(name='V', value=V.astype(theano.config.floatX)) self.W1 = theano.shared(name='W1', value=W1.astype(theano.config.floatX)) self.W12 = theano.shared(name='W12', value=W12.astype(theano.config.floatX)) self.W2 = theano.shared(name='W2', value=W2.astype(theano.config.floatX)) # 我们在这里存储Theano图 self.theano = {} self.__theano_build__() def forward_prop_step(self, x_t, s_t1_prev, s_t2_prev, U, V, W1, W12, W2): s_t1 = T.tanh(U[:,x_t] + W1.dot(s_t1_prev)) s_t2 = T.tanh(W12.dot(s_t1) + W2.dot(s_t2_prev)) o_t = T.nnet.softmax(V.dot(s_t2)) return [o_t[0], s_t1, s_t2] def __theano_build__(self): U, V, W1, W12, W2 = self.U, self.V, self.W1, self.W12, self.W2 x = T.ivector('x') y = T.ivector('y') [o,s1,s2], updates = theano.scan( self.forward_prop_step, sequences=x, outputs_info=[None, dict(initial=T.zeros(self.hidden_dim)), dict(initial=T.zeros(self.hidden_dim))], non_sequences=[U, V, W1, W12, W2], truncate_gradient=self.bptt_truncate, strict=False) prediction = T.argmax(o, axis=1) o_error = T.sum(T.nnet.categorical_crossentropy(o, y)) # 梯度 dU = T.grad(o_error, U) dV = T.grad(o_error, V) dW1 = T.grad(o_error, W1) dW12 = T.grad(o_error, W12) dW2 = T.grad(o_error, W2) # 分配函数 self.forward_propagation = theano.function([x], o) self.predict = theano.function([x], prediction) self.ce_error = theano.function([x, y], o_error) self.bptt = theano.function([x, y], [dU, dV, dW1, dW12, dW2]) # SGD learning_rate = T.scalar('learning_rate') self.sgd_step = theano.function([x,y,learning_rate], [], updates=[(self.U, self.U - learning_rate * dU), (self.V, self.V - learning_rate * dV), (self.W1, self.W1 - learning_rate * dW1) (self.W12, self.W12 - learning_rate * dW12), (self.W2, self.W2 - learning_rate * dW2)]) def calculate_total_loss(self, X, Y): return np.sum([self.ce_error(x,y) for x,y in zip(X,Y)]) def calculate_loss(self, X, Y): # 通过单词数量来计算损失 num_words = np.sum([len(y) for y in Y]) return self.calculate_total_loss(X,Y)/float(num_words)
回答:
尝试更改
return [o_t[0], s_t1, s_t2]
为
return o_t[0], s_t1, s_t2
我认为前者导致方法返回的内容被Theano强制转换为单一张量,而后者明确返回了outputs_info
中指示的三个对象。