我在进行多类分类任务时使用了自定义指标。我使用的是从网上找到的代码。
自定义指标的类是这样的:
import numpy as npimport kerasfrom keras.callbacks import Callbackfrom sklearn.metrics import confusion_matrix, f1_score, precision_score, recall_scoreclass Metrics(keras.callbacks.Callback): def on_train_begin(self, logs={}): self.confusion = [] self.precision = [] self.recall = [] self.f1s = [] def on_epoch_end(self, epoch, logs={}): score = np.asarray(self.model.predict(self.validation_data[0])) predict = np.round(np.asarray(self.model.predict(self.validation_data[0]))) targ = self.validation_data[1] self.f1s.append(sklm.f1_score(targ, predict,average='micro'))self.confusion.append(sklm.confusion_matrix(targ.argmax(axis=1),predict.argmax(axis=1))) return confusion, precision, recall, f1s
在模型训练时使用Metrics类的对象:
history = model.fit(X_train, np.array(Y_train), batch_size=32, epochs=10, validation_data=(X_test, np.array(Y_test)), #validation_split=0.1, verbose=2, callbacks=[Metrics()])
我遇到了以下错误:
TypeError: 'NoneType' object is not subscriptable
错误跟踪:
Epoch 1/10 -------------------------------------------------------------- TypeError Traceback (most recent call last) <ipython-input-63-1a11cfdbd329> in <module>() 6 #validation_split=0.1, 7 verbose=2, ----> 8 callbacks=[Metrics()]) 3 frames <ipython-input-62-8073719b4ec0> in on_epoch_end(self, epoch, logs) 12 13 def on_epoch_end(self, epoch, logs={}): ---> 14 score = np.asarray(self.model.predict(self.validation_data[0])) 15 predict = np.round(np.asarray(self.model.predict(self.validation_data[0]))) 16 targ = self.validation_data[1] TypeError: 'NoneType' object is not subscriptable
为什么尽管我在类方法中提供了返回参数,但它仍然是NoneType对象呢?
更新:
我认为问题可能出在我使用的数据集上,数据的结构可能会导致自定义指标出现错误。然而,有一个解决方案似乎对我的数据有效。
import keras.backend as K def f1_metric(y_true, y_pred): true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1))) possible_positives = K.sum(K.round(K.clip(y_true, 0, 1))) predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1))) precision = true_positives / (predicted_positives + K.epsilon()) recall = true_positives / (possible_positives + K.epsilon()) f1_val = 2*(precision*recall)/(precision+recall+K.epsilon()) return f1_val model.compile(...,metrics=['accuracy', f1_metric])
来源:https://datascience.stackexchange.com/questions/48246/how-to-compute-f1-in-tensorflow
回答:
validation_data
已被弃用,请查看。这是您设置的一个解决方案。
class Metrics(tf.keras.callbacks.Callback): def __init__(self,val_x, val_y, batch_size = 20): super().__init__() self.val_x = val_x # < --------- self.val_y = val_y # < --------- self.batch_size = batch_size def on_train_begin(self, logs=None): ... def on_epoch_end(self, epoch, logs=None): ...
基本上,您需要在自定义回调函数中使用__init__
。
样本代码
下面是一个演示
import numpy as npimport tensorflow as tf from tensorflow.keras.models import Modelfrom tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()# 训练集 / 数据 x_train = x_train.astype('float32') / 255# 验证集 / 数据 x_test = x_test.astype('float32') / 255# 训练集 / 目标 y_train = tf.keras.utils.to_categorical(y_train, num_classes=10)# 验证集 / 目标 y_test = tf.keras.utils.to_categorical(y_test, num_classes=10)print(x_train.shape, y_train.shape)print(x_test.shape, y_test.shape) # (50000, 32, 32, 3) (50000, 10)# (10000, 32, 32, 3) (10000, 10)
模型
input = tf.keras.Input(shape=(32,32,3))# 块1x = tf.keras.layers.Conv2D(32, 3, strides=2, activation="relu")(input)x = tf.keras.layers.MaxPooling2D(3)(x)# 现在我们应用全局最大池化。gap = tf.keras.layers.GlobalMaxPooling2D()(x)# 最后,我们添加一个分类层。output = tf.keras.layers.Dense(10, activation='softmax')(gap)# 绑定所有func_model = tf.keras.Model(input, output)
自定义回调
from sklearn.metrics import f1_score, confusion_matrixclass Metrics(tf.keras.callbacks.Callback): def __init__(self,val_x, val_y, batch_size = 20): super().__init__() self.val_x = val_x self.val_y = val_y self.batch_size = batch_size def on_train_begin(self, logs=None): self.confusion = [] self.precision = [] self.recall = [] self.f1s = [] def on_epoch_end(self, epoch, logs=None): x = self.val_x targ = self.val_y score = np.asarray(self.model.predict(x)) predict = np.round(np.asarray(self.model.predict(x))) self.f1s.append(f1_score(targ, predict, average='micro')) self.confusion.append(confusion_matrix(targ.argmax(axis=1), predict.argmax(axis=1))) print("\n在第{}个epoch的f1_score为{}:".format(epoch, self.f1s[-1])) print('\n在第{}个epoch的混淆矩阵为{}'.format(epoch, self.confusion[-1])) return
运行
# 编译print('\n功能API')func_model.compile( loss = 'categorical_crossentropy', optimizer = tf.keras.optimizers.Adam() )metrics = Metrics(x_test, y_test)# 拟合 func_model.fit(x_train, y_train, validation_data=(x_test, y_test), callbacks= [metrics] , batch_size=128, epochs=3)
日志
训练50000个样本,验证10000个样本Epoch 1/349920/50000 [============================>.] - ETA: 0s - loss: 1.7155在第0个epoch的f1_score为0.14721851357365376:在第0个epoch的混淆矩阵为[[919 11 2 0 0 2 3 6 46 11] [859 98 0 0 0 0 2 2 5 34] [942 7 9 0 1 6 27 6 2 0] [973 6 1 1 0 8 5 3 2 1] [949 1 0 0 22 1 22 3 1 1] [958 2 0 0 0 26 5 6 2 1] [831 2 3 0 0 1 158 1 3 1] [916 5 1 0 1 6 2 56 1 12] [750 11 0 0 0 0 1 4 227 7] [835 27 0 0 0 1 4 4 9 120]]50000/50000 [==============================] - 10s 206us/sample - loss: 1.7154 - val_loss: 1.7113Epoch 2/349664/50000 [============================>.] - ETA: 0s - loss: 1.7102在第1个epoch的f1_score为0.16048514677447706:在第1个epoch的混淆矩阵为[[896 10 2 0 0 2 2 5 69 14] [845 89 0 0 0 1 2 2 11 50] [941 6 12 0 2 7 23 5 4 0] [967 5 1 0 0 13 6 2 5 1] [946 0 1 0 27 1 20 2 2 1] [945 2 1 0 0 38 5 5 2 2] [840 2 4 0 0 1 148 1 3 1] [910 4 1 0 1 13 1 51 1 18] [694 6 0 0 0 0 0 3 290 7] [803 23 0 0 1 1 4 4 13 151]]50000/50000 [==============================] - 10s 198us/sample - loss: 1.7102 - val_loss: 1.7080Epoch 3/350000/50000 [==============================] - ETA: 0s - loss: 1.7059在第2个epoch的f1_score为0.16229953553588644:在第2个epoch的混淆矩阵为[[899 9 2 0 0 2 3 5 65 15] [861 72 0 0 0 0 2 1 11 53] [935 4 10 0 1 5 36 6 3 0] [972 4 1 2 0 6 8 1 4 2] [949 0 1 0 10 0 35 2 2 1] [963 2 1 0 0 19 6 5 2 2] [796 2 4 0 0 0 194 0 3 1] [917 3 1 0 0 3 3 53 1 19] [702 4 0 0 0 0 1 3 281 9] [798 20 0 0 0 1 5 4 13 159]]50000/50000 [==============================] - 10s 196us/sample - loss: 1.7059 - val_loss: 1.7067
更新
关于您对自定义指标的担忧,它运行良好
import tensorflow.keras.backend as Kdef f1_metric(y_true, y_pred): true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1))) possible_positives = K.sum(K.round(K.clip(y_true, 0, 1))) predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1))) precision = true_positives / (predicted_positives + K.epsilon()) recall = true_positives / (possible_positives + K.epsilon()) f1_val = 2*(precision*recall)/(precision+recall+K.epsilon()) return f1_val# 编译print('\n功能API')func_model.compile( metrics=['accuracy', f1_metric], loss = 'categorical_crossentropy', optimizer = tf.keras.optimizers.Adam() )
Epoch 1/350000/50000 [==============================] - ETA: 5s - loss: 2.2136 - accuracy: 0.1976 - f1_metric: 0.0000e+00 - val_loss: 2.1119 - val_accuracy: 0.2443 - val_f1_metric: 0.0000e+00Epoch 2/350000/50000 [==============================] - ETA: 7s - loss: 2.0456 - accuracy: 0.2546 - f1_metric: 4.3617e-04 - val_loss: 1.9909 - val_accuracy: 0.2829 - val_f1_metric: 0.0022