我在Python 3.6中使用了包含隐马尔可夫模型(HMM)的语音识别代码。训练数据(输入文件夹)由.wav
文件组成,组织结构如下
train pineapple apple banana orange kiwi peach lime
test
数据文件夹使用了类似的模式。
代码在命令提示符下运行:
python Speech-Recognizer.py --input-folder train
代码如下所示:
import osimport argparseimport numpy as npfrom scipy.io import wavfilefrom hmmlearn import hmmfrom python_speech_features import mfcc# Function to parse input argumentsdef build_arg_parser(): parser = argparse.ArgumentParser(description='Trains the HMM classifier') parser.add_argument("--input-folder", dest="input_folder", required=True, help="Input folder containing the audio files in subfolders") return parser# Class to handle all HMM related processingclass HMMTrainer(object): def __init__(self, model_name='GaussianHMM', n_components=4, cov_type='diag', n_iter=1000): self.model_name = model_name self.n_components = n_components self.cov_type = cov_type self.n_iter = n_iter self.models = [] if self.model_name == 'GaussianHMM': self.model = hmm.GaussianHMM(n_components=self.n_components, covariance_type=self.cov_type, n_iter=self.n_iter) else: raise TypeError('Invalid model type') # X is a 2D numpy array where each row is 13D def train(self, X): np.seterr(all='ignore') self.models.append(self.model.fit(X)) # Run the model on input data def get_score(self, input_data): return self.model.score(input_data)if __name__ == '__main__': args = build_arg_parser().parse_args() input_folder = args.input_folder hmm_models = [] # Parse the input directory for dirname in os.listdir(input_folder): # Get the name of the subfolder subfolder = os.path.join(input_folder, dirname) if not os.path.isdir(subfolder): continue # Extract the label label = subfolder[subfolder.rfind('/') + 1:] # Initialize variables X = np.array([]) y_words = [] # Iterate through the audio files (leaving 1 file for testing in each class) for filename in [x for x in os.listdir(subfolder) if x.endswith('.wav')][:-1]: # Read the input file filepath = os.path.join(subfolder, filename) sampling_freq, audio = wavfile.read(filepath) # Extract MFCC features mfcc_features = mfcc(audio, sampling_freq) # Append to the variable X if len(X) == 0: X = mfcc_features else: X = np.append(X, mfcc_features, axis=0) # Append the label y_words.append(label) print('X.shape =', X.shape) # Train and save HMM model hmm_trainer = HMMTrainer() hmm_trainer.train(X) hmm_models.append((hmm_trainer, label)) hmm_trainer = None # Test files input_files = [ 'test/pineapple/pineapple15.wav', 'test/orange/orange15.wav', 'test/apple/apple15.wav', 'test/kiwi/kiwi15.wav' ] # Classify input data for input_file in input_files: # Read input file sampling_freq, audio = wavfile.read(input_file) # Extract MFCC features mfcc_features = mfcc(audio, sampling_freq) # Define variables max_score = None output_label = None # Iterate through all HMM models and pick # the one with the highest score for item in hmm_models: hmm_model, label = item score = hmm_model.get_score(mfcc_features) if score > max_score: max_score = score output_label = label # Print the output print("\nTrue:", input_file[input_file.find('/') + 1:input_file.rfind('/')]) print("Predicted:", output_label)
运行上述代码时,我得到了以下错误:
Traceback (most recent call last): File "Speech-Recognizer.py", line 113, in <module> if score > max_score:TypeError: '>' not supported between instances of 'float' and 'NoneType'
回答:
max_score = None... if score > max_score:
你正在尝试将一个浮点数与None进行比较。
你可以将max_score = None改为max_score = 0,试试看怎么样?