这是我的训练模型,我基本上是按照这个教程https://www.youtube.com/watch?v=1lwddP0KUEg进行的操作,只是我必须使用stanza包来处理西班牙语版本:
import randomimport jsonimport pickleimport numpy as npimport pandas as pdimport nltkfrom nltk.tokenize.toktok import ToktokTokenizerimport stanzafrom tensorflow.keras.models import Sequentialfrom tensorflow.keras.layers import Dense, Activation, Dropoutfrom tensorflow.keras.optimizers import SGDtoktok = ToktokTokenizer()intents = json.loads(open("intents.json", "rb").read())words = []classes = []documents = []ignore_letters = ['¿','?', '!', '.', ',']nlp = stanza.Pipeline('es')for intent in intents['intents']: for pattern in intent['patterns']: word_list = toktok.tokenize(pattern) words.extend(word_list) documents.append((word_list, intent['tag'])) if intent['tag'] not in classes: classes.append(intent['tag'])words = [nlp(word).to_dict() for word in words if word not in ignore_letters] palabras = []for i in range(0,len(words)): texto = words[i][0][0]["lemma"] palabras.append(texto) palabras = sorted(set(palabras)) classes = sorted(set(classes))pickle.dump(palabras, open('palabras.pkl', 'wb'))pickle.dump(classes, open('classes.pkl', 'wb'))training = []output_empty = [0] * len(classes)for document in documents: bag = [] word_patterns = document[0] word_patterns = [nlp(word.lower()).to_dict() for word in word_patterns] for word in words: bag.append(1) if word in word_patterns else bag.append(0) output_row = list(output_empty) output_row[classes.index(document[1])] = 1 training.append([bag, output_row]) random.shuffle(training)training = np.array(training, dtype="object")train_x = list(training[:, 0])train_y = list(training[:, 1])model = Sequential()model.add(Dense(128, input_shape=(len(train_x[0]),), activation='relu'))model.add(Dropout(0.5))model.add(Dense(64, activation='relu'))model.add(Dropout(0.5))model.add(Dense(len(train_y[0]), activation='softmax'))sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])hist = model.fit(np.array(train_x), np.array(train_y), epochs=200, batch_size=5, verbose=1)model.save('chatbotmodel.h5', hist)print('Listo')
问题在于当我尝试使用以下代码让模型预测类别时:
import randomimport jsonimport pickleimport numpy as npimport nltkfrom nltk.tokenize.toktok import ToktokTokenizerimport stanzafrom tensorflow.keras.models import load_modeltoktok = ToktokTokenizer()intents = json.loads(open('intents.json').read())words = pickle.load(open('palabras.pkl','rb'))classes = pickle.load(open('classes.pkl', 'rb'))model = load_model('chatbotmodel.h5')nlp = stanza.Pipeline('es')def clean_up_sentence(sentence): sentence_words = toktok.tokenize(sentence) words = [nlp(word).to_dict() for word in sentence_words] palabras = [] for i in range(0,len(words)): texto = words[i][0][0]["lemma"] palabras.append(texto) palabras = sorted(set(palabras)) return palabrasdef bag_of_words(sentence): sentences_words = clean_up_sentence(sentence) bag = [0] * len(words) for w in sentences_words: for i, word in enumerate(words): if word == w: bag[i] = 1 return np.array(bag)def predict_class(sentence): bow = bag_of_words(sentence) res = model.predict(np.array([bow]))[0] return respredict_class('Hola, ¿cómo te va?)
出现了以下错误:
ValueError: Input 0 of layer sequential_1 is incompatible with the layer: expected axis -1 of input shape to have value 15 but received input with shape (None, 13)
有什么想法吗?因为我基本上是按照教程一步一步来的。用于训练的intents.json数据非常简单:
{"intents": [ {"tag": "respuestaSaludo", "patterns": ["bien","¿quien es?","¿qué desea?"], "responses": ["Mi nombre es Juan Carlos Bellido", "Soy Juan"] }, {"tag": "respuestaPropuesta", "patterns": ["no","no él gracias","no por ahora, aquí","de nuevo"], "responses": ["ok, gracias","ok"] }]}
编辑1:model.summary():
Model: "sequential_1"_________________________________________________________________Layer (type) Output Shape Param # =================================================================dense_1 (Dense) (None, 128) 2048 _________________________________________________________________dropout (Dropout) (None, 128) 0 _________________________________________________________________dense_2 (Dense) (None, 64) 8256 _________________________________________________________________dropout_1 (Dropout) (None, 64) 0 _________________________________________________________________dense_3 (Dense) (None, 2) 130 =================================================================Total params: 10,434Trainable params: 10,434Non-trainable params: 0_________________________________________________________________
bow.shape:
def predict_class(sentence): bow = bag_of_words(sentence) return print(bow.shape)predict_class('quiero ver que sucede')
输出:
(13,)
回答:
据我所知 – 你的模型期望序列长度为15。你输入了13个标记。
尝试在你的序列末尾添加两个零:
bow = bag_of_words(sentence)bow = np.concatenate((bow, np.array([0, 0])), -1)res = model.predict(np.array([bow]))[0]