我在尝试使用我在GitHub上找到的代码来使用人工神经网络: https://github.com/stephencwelch/Neural-Networks-Demystified/blob/master/Part%206%20Training.ipynb
然而,尽管矩阵的维度应该没有问题,我仍然遇到了ValueError错误
ValueError: operands could not be broadcast together with shapes (4,1) (51,1)
数值51是我列表的长度,并且由于我有13个输入变量,我为人工神经网络设置了13个输入节点。
我的输入变量的代码如下:
listofbirthweights = [3430., 3815., 3405., 2190.]#, 2990., 3175., 3830., 3447., 3885., 2926., 2632., 4451., 3540., 2720., 4111., 2960., 3240., 3271., 3402., 2364., 3575., 3550., 3997., 3430., 3430., 3827., 2920., 3228., 2807., 4060., 3685.5, 2598., 3345.3, 3005.1000000000004, 3316.9500000000003, 4139.1, 3402L, 3600.4500000000003, 3373.65, 3742.2000000000003, 3061.8, 3572.1000000000004, 2721.6000000000004, 3827.25, 3175.2000000000003, 3515.4, 3997.3500000000004, 2721.6000000000004, 3883.9500000000003, 2523.15]for weight in range(len(listofbirthweights)): listofbirthweights[weight] /= 5000y = np.array([listofbirthweights]).TlistVariable1 = [0.14639468261405383, 0.97545213377969786, 0.84734727899207973, 1.4484739704326901, 0.16314232728757735, 0.66187166090795935, 1.4376256200580582, 0.29994037390938211, 0.48907780685504304, 0.53109657979711511, 1.2277807441383937, 0.27907030573330116, 1.3695910015926773, 0.14496631493245565, 0.27423132990109989, 0.93782145410422046, 0.14868468846354996, 0.072958383822129144, 1.0420064935722728, 0.54012040073377132, 0.45292701498298288, 0.095267045187387697, 0.50790418911918012, 3.2574574354167067, 0.10911269682418498, 0.16996335861428288, 0.45524586101182357, 0.351405722226031, 0.37124417299819079, 1.4203957243198315, 0.27119225632260868, 1.1113740564160903, 0.19354881514016675, 0.58281757593522832, 0.43372029148679786, 0.098744798454119737, 0.5704041737669534, 0.17066813348854515, 0.30884364780932816, 0.51588822273416624, 0.15695045296832874, 0.048520413305103163, 0.068344316276563011, 0.1315183678181788, 0.3389983553892445, 0.31601229484661936, 0.22713267470170798, 0.12214944241664846, 0.50534165886223625, 0.17544983816074147, 0.11438673225012383]listVariable2 = [0.10404445113191939, 0.19279221856537584, 0.19863151442801369, 0.15613544736003321, 0.13685881186064319, 0.21790024928306184, 0.2285008959978875, 0.17132273541147319, 0.12186212530933309, 0.11586768312596153, 0.15240676731462721, 0.11983360728747816, 0.20837521980545767, 0.10042039897554125, 0.070516135601742524, 0.21948822954359659, 0.11369528056781744, 0.058745113994697097, 0.25876234910621898, 0.15172764031616054, 0.14198160916163663, 0.078556849795320821, 0.26494990754411307, 0.17906875381180659, 0.07011823589923101, 0.079517491131827003, 0.092000848513905095, 0.15454500379080002, 0.21840477158134278, 0.2294134024515975, 0.085012773523916105, 0.34415042365581427, 0.11357092336082605, 0.18686647426678205, 0.18782663709201797, 0.06178242631156642, 0.16262339377383228, 0.15544278558139304, 0.15065699239514796, 0.23495689198145908, 0.11411254444775469, 0.04023291805330205, 0.059241440093412798, 0.10989999051359937, 0.14767676917896719, 0.18974739389420864, 0.15690022007602542, 0.063365808700030474, 0.12572691445860784, 0.088567961354411431, 0.08900510969553857]listVariable3 = [0.074863618029391385, 0.095010440494756557, 0.097469695528702424, 0.10502061876774141, 0.114762367888051, 0.09765101230482999, 0.14256688141118506, 0.11169916877052428, 0.061725684295155465, 0.078011539339108621, 0.12426630911150606, 0.081722637644897025, 0.10870057919577722, 0.071181341651149227, 0.033366556530970357, 0.13085602655233133, 0.087481468883883612, 0.048346165493361182, 0.15539661605282443, 0.093560800477644743, 0.069517040270462085, 0.065111072275230275, 0.15458903743493821, 0.1059482501424576, 0.0481638760877472, 0.048330906278235268, 0.052931466865967829, 0.074794265496116702, 0.14584687194117452, 0.11199879427613438, 0.041145862929713707, 0.21696854620842304, 0.074216829958392447, 0.13410960276192421, 0.12484917055142346, 0.043146743378963993, 0.077103864736989131, 0.14028779549247919, 0.0859517900381392, 0.13977865876300227, 0.084691654823737111, 0.033413517733277852, 0.051390066235776209, 0.091198762600111727, 0.07285423352434188, 0.11528025761246247, 0.10931511725529663, 0.037325051526288358, 0.074118578620423786, 0.05424879646640287, 0.068966863864605668]...(此处省略了其余的变量列表)...X = np.transpose([listVariable1, listVariable2, listVariable3,listVariable4,listVariable5,listVariable6,listVariable7,listVariable8,listVariable9,listVariable10,listVariable11,listVariable12,listVariable13])X = X/np.amax(X)class Neural_Network(object): def __init__(self): #定义超参数 self.inputLayerSize = 13 self.outputLayerSize = 1 self.hiddenLayerSize = 16 #权重(参数) self.W1 = np.random.randn(self.inputLayerSize,self.hiddenLayerSize) self.W2 = np.random.randn(self.hiddenLayerSize,self.outputLayerSize) def forward(self, X): #将输入通过网络传播 self.z2 = np.dot(X, self.W1) self.a2 = self.sigmoid(self.z2) self.z3 = np.dot(self.a2, self.W2) yHat = self.sigmoid(self.z3) return yHat def sigmoid(self, z): #应用sigmoid激活函数到标量、向量或矩阵 return 1/(1+np.exp(-z)) def sigmoidPrime(self,z): #sigmoid的梯度 return np.exp(-z)/((1+np.exp(-z))**2) def costFunction(self, X, y): #计算给定X和y的成本,使用类中已存储的权重 self.yHat = self.forward(X) J = 0.5*sum((y-self.yHat)**2) return J def costFunctionPrime(self, X, y): #计算给定X和y对W1和W2的导数,使用类中已存储的权重 self.yHat = self.forward(X) delta3 = np.multiply(-(y-self.yHat), self.sigmoidPrime(self.z3)) dJdW2 = np.dot(self.a2.T, delta3) delta2 = np.dot(delta3, self.W2.T)*self.sigmoidPrime(self.z2) dJdW1 = np.dot(X.T, delta2) return dJdW1, dJdW2 #与其他类交互的帮助函数: def getParams(self): #获取W1和W2并展开成向量: params = np.concatenate((self.W1.ravel(), self.W2.ravel())) return params def setParams(self, params): #使用单一参数向量设置W1和W2 W1_start = 0 W1_end = self.hiddenLayerSize * self.inputLayerSize self.W1 = np.reshape(params[W1_start:W1_end], (self.inputLayerSize , self.hiddenLayerSize)) W2_end = W1_end + self.hiddenLayerSize*self.outputLayerSize self.W2 = np.reshape(params[W1_end:W2_end], (self.hiddenLayerSize, self.outputLayerSize)) def computeGradients(self, X, y): dJdW1, dJdW2 = self.costFunctionPrime(X, y) return np.concatenate((dJdW1.ravel(), dJdW2.ravel()))def computeNumericalGradient(N, X, y): paramsInitial = N.getParams() numgrad = np.zeros(paramsInitial.shape) perturb = np.zeros(paramsInitial.shape) e = 1e-4 for p in range(len(paramsInitial)): #设置扰动向量 perturb[p] = e N.setParams(paramsInitial + perturb) loss2 = N.costFunction(X, y) N.setParams(paramsInitial - perturb) loss1 = N.costFunction(X, y) #计算数值梯度 numgrad[p] = (loss2 - loss1) / (2*e) #将我们更改的值归零: perturb[p] = 0 #将参数恢复到原始值: N.setParams(paramsInitial) return numgrad from scipy import optimizeclass trainer(object): def __init__(self, N): #创建对网络的本地引用: self.N = N def callbackF(self, params): self.N.setParams(params) self.J.append(self.N.costFunction(self.X, self.y)) def costFunctionWrapper(self, params, X, y): self.N.setParams(params) cost = self.N.costFunction(X, y) grad = self.N.computeGradients(X,y) return cost, grad def train(self, X, y): #创建用于回调函数的内部变量: self.X = X self.y = y #创建空列表来存储成本: self.J = [] params0 = self.N.getParams() options = {'maxiter': 200, 'disp' : True} _res = optimize.minimize(self.costFunctionWrapper, params0, jac=True, method='BFGS', \ args=(X, y), options=options, callback=self.callbackF) self.N.setParams(_res.x) self.optimizationResults = _resNN = Neural_Network()T = trainer(NN)T.train(X,y)NN.costFunctionPrime(X,y)NN.forward(X)
如果使用以下变量,代码运行正常:
X = np.array(([3.,5.], [5.,1.], [10.,2.]), dtype=float)X = X/np.amax(X, axis=0)y = np.array(([75.], [82.], [93.]), dtype=float) y = y/100
并将输入参数更改为如下(如网站示例中所示): self.inputLayerSize = 2 self.outputLayerSize = 1 self.hiddenLayerSize = 3
我猜测Python不喜欢我输入数据时对X和y变量的格式化方式 – 或许它不识别它们为numpy数组?
作为参考,我的输入的完整错误如下:
ValueError Traceback (most recent call last)C:\Users\ENVY14-i7-SPECTRE\AppData\Local\Enthought\Canopy\App\appdata\canopy-1.4.1.1975.win-x86_64\lib\site-packages\IPython\utils\py3compat.pyc in execfile(fname, glob, loc) 195 else: 196 filename = fname--> 197 exec compile(scripttext, filename, 'exec') in glob, loc 198 else: 199 def execfile(fname, *where):C:\Users\ENVY14-i7-SPECTRE\Documents\Year 4\AlexMSci\NeuralNetworkMachineLearning2.py in <module>() 172 NN = Neural_Network() 173 T = trainer(NN)--> 174 T.train(X,y) 175 NN.costFunctionPrime(X,y) 176 #Xtest = np.array(([4.3,5.], [6.,2.], [12.,6.]), dtype=float)C:\Users\ENVY14-i7-SPECTRE\Documents\Year 4\AlexMSci\NeuralNetworkMachineLearning2.py in train(self, X, y) 160 options = {'maxiter': 200, 'disp' : True} 161 _res = optimize.minimize(self.costFunctionWrapper, params0, jac=True, method='BFGS', --> 162 args=(X, y), options=options, callback=self.callbackF) 163 164 self.N.setParams(_res.x)C:\Users\ENVY14-i7-SPECTRE\AppData\Local\Enthought\Canopy\User\lib\site-packages\scipy\optimize\_minimize.pyc in minimize(fun, x0, args, method, jac, hess, hessp, bounds, constraints, tol, callback, options) 439 return _minimize_cg(fun, x0, args, jac, callback, **options) 440 elif meth == 'bfgs':--> 441 return _minimize_bfgs(fun, x0, args, jac, callback, **options) 442 elif meth == 'newton-cg': 443 return _minimize_newtoncg(fun, x0, args, jac, hess, hessp, callback,C:\Users\ENVY14-i7-SPECTRE\AppData\Local\Enthought\Canopy\User\lib\site-packages\scipy\optimize\optimize.pyc in _minimize_bfgs(fun, x0, args, jac, callback, gtol, norm, eps, maxiter, disp, return_all, **unknown_options) 845 else: 846 grad_calls, myfprime = wrap_function(fprime, args)--> 847 gfk = myfprime(x0) 848 k = 0 849 N = len(x0)C:\Users\ENVY14-i7-SPECTRE\AppData\Local\Enthought\Canopy\User\lib\site-packages\scipy\optimize\optimize.pyc in function_wrapper(*wrapper_args) 287 def function_wrapper(*wrapper_args): 288 ncalls[0] += 1--> 289 return function(*(wrapper_args + args)) 290 291 return ncalls, function_wrapperC:\Users\ENVY14-i7-SPECTRE\AppData\Local\Enthought\Canopy\User\lib\site-packages\scipy\optimize\optimize.pyc in derivative(self, x, *args) 69 return self.jac 70 else:---> 71 self(x, *args) 72 return self.jac 73 C:\Users\ENVY14-i7-SPECTRE\AppData\Local\Enthought\Canopy\User\lib\site-packages\scipy\optimize\optimize.pyc in __call__(self, x, *args) 61 def __call__(self, x, *args): 62 self.x = numpy.asarray(x).copy()---> 63 fg = self.fun(x, *args) 64 self.jac = fg[1] 65 return fg[0]C:\Users\ENVY14-i7-SPECTRE\Documents\Year 4\AlexMSci\NeuralNetworkMachineLearning2.py in costFunctionWrapper(self, params, X, y) 143 def costFunctionWrapper(self, params, X, y): 144 self.N.setParams(params)--> 145 cost = self.N.costFunction(X, y) 146 grad = self.N.computeGradients(X,y) 147 C:\Users\ENVY14-i7-SPECTRE\Documents\Year 4\AlexMSci\NeuralNetworkMachineLearning2.py in costFunction(self, X, y) 69 #计算给定X和y的成本,使用类中已存储的权重 70 self.yHat = self.forward(X)---> 71 J = 0.5*sum((y-self.yHat)**2) 72 return J 73 ValueError: operands could not be broadcast together with shapes (4,1) (51,1)
回答:
在工作的情况下,两个数组是(3,2)
和(3,1)
。它们是可广播的 – 第一个维度匹配,第二个1
可以扩展到2
。
错误发生在
T.train(X,y)
错误堆栈的其余部分可能不重要,因为它不是你的代码。我们可以假设如果X
和y
具有正确的形状,它将工作。
我没有在错误情况下看到这两个数组的形状。我不会运行你的代码来自己找出答案。:(
我认为你初始的y
可以用以下方式创建:
listofbirthweights = np.array([[3430., 3815., 3405., 2190.]]).T/5000.
生成一个形状为(4,1)的浮点数数组。
你的X
是
In [199]: X.shapeOut[199]: (51, 13)
在不了解T.trainer
的情况下,从错误(4,1) (51,1)
来看,似乎它在使用你的y
和X
的列。为什么y
不是(51,1)
?为什么是4?