### Keras神经网络对每个手写数字都预测相同的数字

我刚开始学习机器学习,所以作为第一个项目,我尝试构建了一个基于MNIST数据集的手写数字识别神经网络。当我用数据集本身提供的测试图像进行测试时,它似乎工作得很好(这就是函数test_predict的用途)。现在我想更进一步,让网络识别我拍摄的一些实际手写数字。函数partial_img_rec处理包含多个数字的图像,并将被multiple_digits调用。我知道这里使用递归可能看起来有点奇怪,我相信有更有效的方法来做这件事,但这不是问题所在。为了测试partial_img_rec,我提供了一些单个数字的照片,这些照片存储在.\individual_test文件夹中,它们看起来像这样:
1_digit.jpg

问题是:我的神经网络对每一个测试图像的预测都是“5”。无论实际显示的数字是什么,概率总是大约22%。我完全理解为什么结果不如使用MNIST数据集的测试图像时那样好,但我绝对没想到会这样。你知道这是为什么吗?任何建议都欢迎。提前谢谢你。

这是我的代码(已编辑,现在可以工作):

# import keras and the MNIST datasetfrom tensorflow.keras.datasets import mnistfrom tensorflow.keras.models import Sequentialfrom tensorflow.keras.layers import Densefrom keras.utils import np_utils# numpy is necessary since keras uses numpy arraysimport numpy as np# imports for picturesfrom PIL import Imagefrom PIL import ImageOps# imports for testsimport randomimport osclass mnist_network():    def __init__(self):        """ load data, create and train model """        # load data        (X_train, y_train), (X_test, y_test) = mnist.load_data()        # flatten 28*28 images to a 784 vector for each image        num_pixels = X_train.shape[1] * X_train.shape[2]        X_train = X_train.reshape((X_train.shape[0], num_pixels)).astype('float32')        X_test = X_test.reshape((X_test.shape[0], num_pixels)).astype('float32')        # normalize inputs from 0-255 to 0-1        X_train = X_train / 255        X_test = X_test / 255        # one hot encode outputs        y_train = np_utils.to_categorical(y_train)        y_test = np_utils.to_categorical(y_test)        num_classes = y_test.shape[1]        # create model        self.model = Sequential()        self.model.add(Dense(num_pixels, input_dim=num_pixels, kernel_initializer='normal', activation='relu'))        self.model.add(Dense(num_classes, kernel_initializer='normal', activation='softmax'))        # Compile model        self.model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])        # train the model        self.model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=200, verbose=2)        self.train_img = X_train        self.train_res = y_train        self.test_img = X_test        self.test_res = y_test    def test_all(self):        """ evaluates the success rate using all the test data """        scores = self.model.evaluate(self.test_img, self.test_res, verbose=0)        print("Baseline Error: %.2f%%" % (100-scores[1]*100))    def predict_result(self, img, num_pixels = None, show=False):        """ predicts the number in a picture (vector) """        assert type(img) == np.ndarray and img.shape == (784,)        """if show:            # show the picture!!!! some problem here            plt.imshow(img, cmap='Greys')            plt.show()"""        num_pixels = img.shape[0]        # the actual number        res_number = np.argmax(self.model.predict(img.reshape(-1,num_pixels)), axis = 1)        # the probabilities        res_probabilities = self.model.predict(img.reshape(-1,num_pixels))        return (res_number[0], res_probabilities.tolist()[0])    # we only need the first element since they only have one    def test_predict(self, amount_test = 100):        """ test some random numbers from the test part of the data set """        assert type(amount_test) == int and amount_test <= 10000        cnt_right = 0        cnt_wrong = 0        for i in range(amount_test):            ind = random.randrange(0,10000) # there are 10000 images in the test part of the data set            """ correct_res is the actual result stored in the data set                 It's represented as a list of 10 elements one of which being 1, the rest 0 """            correct_list = self.test_res.tolist()            correct_list = correct_list[ind] # the correct sublist            correct_res = correct_list.index(1.0)            predicted_res = self.predict_result(self.test_img[ind])[0]            if correct_res != predicted_res:                cnt_wrong += 1                print("Error in predict ! \                      index = ", ind, " predicted result = ", predicted_res, " correct result = ", correct_res)            else:                cnt_right += 1        print("The machine predicted correctly ",cnt_right," out of ",amount_test," examples. That is a success rate of ", (cnt_right/amount_test)*100,"%.")    def partial_img_rec(self, image, upper_left, lower_right, results=[]):        """ partial is a part of an image """        left_x, left_y = upper_left        right_x, right_y = lower_right        print("current test part: ", upper_left, lower_right)        print("results: ", results)        # condition to stop recursion: we've reached the full width of the picture        width, height = image.size        if right_x > width:            return results        partial = image.crop((left_x, left_y, right_x, right_y))        # rescale image to 28 *28 dimension        partial = partial.resize((28,28), Image.ANTIALIAS)        partial.show()        # transform to vector        partial =  ImageOps.invert(partial)        partial = np.asarray(partial, "float32")        partial = partial / 255.        partial[partial < 0.5] = 0.        # flatten image to 28*28 = 784 vector        num_pixels = partial.shape[0] * partial.shape[1]        partial = partial.reshape(num_pixels)        step = height // 10        # is there a number in this part of the image?         res, prop = self.predict_result(partial)        print("result: ", res, ". probabilities: ", prop)        # only count this result if the network is >= 50% sure        if prop[res] >= 0.5:                    results.append(res)            # step is 80% of the partial image's size (which is equivalent to the original image's height)             step = int(height * 0.8)            print("found valid result")        else:            # if there is no number found we take smaller steps            step = height // 20         print("step: ", step)        # recursive call with modified positions ( move on step variables )        return self.partial_img_rec(image, (left_x+step, left_y), (right_x+step, right_y), results=results)    def test_individual_digits(self):        """ test partial_img_rec with some individual digits (square shaped images)             saved in the folder 'individual_test' following the pattern 'number_digit.jpg' """        cnt_right, cnt_wrong = 0,0        folder_content = os.listdir(".\individual_test")        for imageName in folder_content:            # image file must be a jpg or png            assert imageName[-4:] == ".jpg" or imageName[-4:] == ".png"            correct_res = int(imageName[0])            image = Image.open(".\\individual_test\\" + imageName).convert("L")            # only square images in this test            if image.size[0]  != image.size[1]:                print(imageName, " has the wrong proportions: ", image.size,". It has to be a square.")                continue             predicted_res = self.partial_img_rec(image, (0,0), (image.size[0], image.size[1]), results=[])            if predicted_res == []:                print("No prediction possible for ", imageName)            else:                predicted_res = predicted_res[0]            if predicted_res != correct_res:                print("error in partial_img-rec! Predicted ", predicted_res, ". The correct result would have been ", correct_res)                cnt_wrong += 1            else:                cnt_right += 1                print("correctly predicted ",imageName)        print(cnt_right, " out of ", cnt_right + cnt_wrong," digits were correctly recognised. The success rate is therefore ", (cnt_right / (cnt_right + cnt_wrong)) * 100," %.")    def multiple_digits(self, img):        """ takes as input an image without unnecessary whitespace surrounding the digits """        #assert type(img) == myImage        width, height = img.size        # start with the first quadratic part of the image        res_list = self.partial_img_rec(img, (0,0),(height ,height))        res_str =""        for elem in res_list:            res_str += str(elem)        return res_strnetwork = mnist_network()    network.test_individual_digits()        

编辑

@的回答非常有帮助,现在网络可以正确预测一些图片,包括上面展示的那张。然而,总体成功率低于50%。你有任何提高性能的建议吗?

返回不佳结果的图像示例:

69


回答:

Related Posts

使用LSTM在Python中预测未来值

这段代码可以预测指定股票的当前日期之前的值,但不能预测…

如何在gensim的word2vec模型中查找双词组的相似性

我有一个word2vec模型,假设我使用的是googl…

dask_xgboost.predict 可以工作但无法显示 – 数据必须是一维的

我试图使用 XGBoost 创建模型。 看起来我成功地…

ML Tuning – Cross Validation in Spark

我在https://spark.apache.org/…

如何在React JS中使用fetch从REST API获取预测

我正在开发一个应用程序,其中Flask REST AP…

如何分析ML.NET中多类分类预测得分数组?

我在ML.NET中创建了一个多类分类项目。该项目可以对…

发表回复

您的邮箱地址不会被公开。 必填项已用 * 标注