这是我的代码
def create_dataset(signal_data, look_back=1): dataX, dataY = [], [] for i in range(len(signal_data) - look_back): dataX.append(signal_data[i:(i + look_back), 0]) dataY.append(signal_data[i + look_back, 0]) return np.array(dataX), np.array(dataY)df = pd.read_csv('time_series.csv')signal_data = df.Close.values.astype('float32')signal_data = signal_data.reshape(len(df), 1)scaler = MinMaxScaler(feature_range=(0, 1))signal_data = scaler.fit_transform(signal_data)train_size = int(len(signal_data) * 0.80)test_size = len(signal_data) - train_size# val_size = len(signal_data) - train_size - test_sizetrain = signal_data[0:train_size]# val = signal_data[train_size:train_size+val_size]test = signal_data[train_size+val_size:len(signal_data)]x_train, y_train = create_dataset(train, look_back)# x_val, y_val = create_dataset(val, look_back)x_test, y_test = create_dataset(test, look_back)x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))# x_val = np.reshape(x_val, (x_val.shape[0], x_val.shape[1], 1))x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))
现在我想添加df.Open
, df.High
, df.Low
和 df.Volume
我应该如何实现这段代码?
我是否应该直接添加到信号数据中?我想知道如何添加数据,以便能够在信号数据中训练多个特征。
我不知道在哪里以及如何实现它。我需要您的帮助。
您的宝贵意见和想法将非常受欢迎。
回答:
我对您的代码进行了几处修改。这应该可以工作。总结如下:
- 我修复了您选择变量0的代码行。现在,目标变量位于最后一个位置,其他变量位于前面的位置
- 我修复了一些不需要的重塑操作,并调整了其他需要的重塑操作以保持所有维度
- 我修复了模型的输入形状,现在您有5个变量而不是1个
我的总体建议:
- 我不会使用
MinMaxScaler
,因为单个异常值可能会干扰整个分布。相反,我会使用StandardScaler
。更多信息请见:http://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.StandardScaler.html - 我会在构建
train_x
、test_x
及其各自的y
之后进行数据缩放。这样做的原因是,您在缩放数据时使用了训练集和测试集的统计数据,即未来的信息。这与您在实际情况中运行代码时所遇到的情况完全不同。即,您需要使用过去的统计数据来缩放新数据。最好构建一个尽可能接近现实的测试集。 - 您如何知道您的模型是否足够大以模拟您的数据?我会去掉dropouts,运行模型看看它是否能过拟合数据。如果模型能够过拟合训练数据,这意味着模型足够大,您可以开始正则化您的模型以增强泛化能力。更多信息请见此书:https://www.deeplearning.ai/machine-learning-yearning/
- 在模型的
metrics
中,您选择了accuracy
,这是分类指标。我会根据我的问题类型(回归)使用一个合适的指标:例如“平均绝对误差”。
希望我能帮到您
import numpy as npimport pandas as pdfrom keras.models import Sequentialfrom keras.layers import Dense, LSTM, Dropout, Conv2D, Reshape, TimeDistributed, Flatten, Conv1D,ConvLSTM2D, MaxPooling1Dfrom keras.layers.core import Dense, Activation, Dropoutfrom sklearn.preprocessing import MinMaxScalerfrom sklearn.metrics import mean_squared_errorimport tensorflow as tfimport matplotlib.pyplot as pltconfig = tf.ConfigProto()config.gpu_options.allow_growth=Truesess = tf.Session(config=config)def create_dataset(signal_data, look_back=1): dataX, dataY = [], [] for i in range(len(signal_data) - look_back): dataX.append(signal_data[i:(i + look_back), :]) dataY.append(signal_data[i + look_back, -1]) return np.array(dataX), np.array(dataY)look_back = 20df = pd.read_csv('kospi.csv')signal_data = df[["Open", "Low", "High", "Volume", "Close"]].values.astype('float32')scaler = MinMaxScaler(feature_range=(0, 1))signal_data = scaler.fit_transform(signal_data)train_size = int(len(signal_data) * 0.80)test_size = len(signal_data) - train_size - int(len(signal_data) * 0.05)val_size = len(signal_data) - train_size - test_sizetrain = signal_data[0:train_size]val = signal_data[train_size:train_size+val_size]test = signal_data[train_size+val_size:len(signal_data)]x_train, y_train = create_dataset(train, look_back)x_val, y_val = create_dataset(val, look_back)x_test, y_test = create_dataset(test, look_back)model = Sequential()model.add(LSTM(128, input_shape=(None, 5),return_sequences=True))model.add(Dropout(0.3))model.add(LSTM(128, input_shape=(None, 5)))model.add(Dropout(0.3))model.add(Dense(128))model.add(Dropout(0.3))model.add(Dense(1))model.compile(loss='mean_squared_error', optimizer='adam', metrics=['accuracy'])model.summary()hist = model.fit(x_train, y_train, epochs=20, batch_size=32, verbose=2, validation_data=(x_val, y_val))trainScore = model.evaluate(x_train, y_train, verbose=0)model.reset_states()print('Train Score: ', trainScore)valScore = model.evaluate(x_val, y_val, verbose=0)model.reset_states()print('Validataion Score: ', valScore)testScore = model.evaluate(x_test, y_test, verbose=0)model.reset_states()print('Test Score: ', testScore)p = model.predict(x_test)print(mean_squared_error(y_test, p))import matplotlib.pyplot as ppltpplt.plot(y_test)pplt.plot(p)pplt.legend(['testY', 'p'], loc='upper right')pplt.show()