我在使用US-CERT数据集进行内部威胁检测,工作环境是TensorFlow。
以下是代码:
import numpy as npimport pandas as pdimport tensorflow as tffrom tensorflow import feature_columnfrom tensorflow.keras import layersfrom sklearn.model_selection import train_test_splitfrom pandas.api.types import CategoricalDtype#Use Pandas to create a dataframe#In windows to get file from path other than same run directory see:#https://stackoverflow.com/questions/16952632/read-a-csv-into-pandas-from-f-drive-on-windows-7URL = 'https://raw.githubusercontent.com/dc401/tensorflow-insiderthreat/master/scenario2-training-dataset-transformed-tf.csv'dataframe = pd.read_csv(URL)#print(dataframe.head())#show dataframe details for column types#print(dataframe.info())#print(pd.unique(dataframe['user']))#https://pbpython.com/categorical-encoding.htmldataframe["user"] = dataframe["user"].astype('category')dataframe["source"] = dataframe["source"].astype('category')dataframe["action"] = dataframe["action"].astype('category')dataframe["user_cat"] = dataframe["user"].cat.codesdataframe["source_cat"] = dataframe["source"].cat.codesdataframe["action_cat"] = dataframe["action"].cat.codes#print(dataframe.info())#print(dataframe.head())#save dataframe with new columns for future datmappingdataframe.to_csv('dataframe-export-allcolumns.csv')#remove old columnsdel dataframe["user"]del dataframe["source"]del dataframe["action"]#restore original names of columnsdataframe.rename(columns={"user_cat": "user", "source_cat": "source", "action_cat": "action"}, inplace=True)print(dataframe.head())print(dataframe.info())#save dataframe cleaned updataframe.to_csv('dataframe-export-int-cleaned.csv')#Split the dataframe into train, validation, and testtrain, test = train_test_split(dataframe, test_size=0.2)train, val = train_test_split(train, test_size=0.2)print(len(train), 'train examples')print(len(val), 'validation examples')print(len(test), 'test examples')#Create an input pipeline using tf.data# A utility method to create a tf.data dataset from a Pandas Dataframedef df_to_dataset(dataframe, shuffle=True, batch_size=32): dataframe = dataframe.copy() labels = dataframe.pop('insiderthreat') ds = tf.data.Dataset.from_tensor_slices((dict(dataframe), labels)) if shuffle: ds = ds.shuffle(buffer_size=len(dataframe)) ds = ds.batch(batch_size) return ds#choose columns needed for calculations (features)feature_columns = []for header in ["vector", "date", "user", "source", "action"]: feature_columns.append(feature_column.numeric_column(header))#create feature layerfeature_layer = tf.keras.layers.DenseFeatures(feature_columns)#set batch size pipelinebatch_size = 32train_ds = df_to_dataset(train, batch_size=batch_size)val_ds = df_to_dataset(val, shuffle=False, batch_size=batch_size)test_ds = df_to_dataset(test, shuffle=False, batch_size=batch_size)#create compile and train modelmodel = tf.keras.Sequential([ feature_layer, layers.Dense(128, activation='relu'), layers.Dense(128, activation='relu'), layers.Dense(1)])model.compile(optimizer='adam', loss=tf.keras.losses.BinaryCrossentropy(from_logits=True), metrics=['accuracy'])model.fit(train_ds, validation_data=val_ds, epochs=5)loss, accuracy = model.evaluate(test_ds)print("Accuracy", accuracy)
准确率在60%到50%之间,因此我想将这个神经网络转换为卷积神经网络以提高准确率。我认为关键是要在这里添加更多的层,例如
#create compile and train modelmodel = tf.keras.Sequential([ feature_layer, layers.Dense(128, activation='relu'), layers.Dense(128, activation='relu'), layers.Dense(1)])
问题是如何将这个模型转换为卷积神经网络?
回答:
你只需用卷积层和池化层替换全连接层:
#create compile and train modelmodel = tf.keras.Sequential([ feature_layer, layers.Conv1D(*args), layers.MaxPooling1D(*args), layers.Flatten(), layers.Dense(1)])
你可以使用TensorFlow关于卷积神经网络的文档来拼凑你应该使用的参数,并进一步定制你的应用。