当我只使用mtry
参数作为tuingrid
时,它可以正常工作,但当我添加ntree
参数后,出现了Error in train.default(x, y, weights = w, ...): The tuning parameter grid should have columns mtry
的错误。以下是代码:
require(RCurl)require(prettyR)library(caret)url <- "https://raw.githubusercontent.com/gastonstat/CreditScoring/master/CleanCreditScoring.csv"cs_data <- getURL(url)cs_data <- read.csv(textConnection(cs_data))classes <- cs_data[, "Status"]predictors <- cs_data[, -match(c("Status", "Seniority", "Time", "Age", "Expenses", "Income", "Assets", "Debt", "Amount", "Price", "Finrat", "Savings"), colnames(cs_data))]train_set <- createDataPartition(classes, p = 0.8, list = FALSE)set.seed(123)cs_data_train = cs_data[train_set, ]cs_data_test = cs_data[-train_set, ]# 定义调整的参数grid <- expand.grid(mtry = seq(4,16,4), ntree = c(700, 1000,2000) )ctrl <- trainControl(method = "cv", number = 10, summaryFunction = twoClassSummary,classProbs = TRUE)rf_fit <- train(Status ~ ., data = cs_data_train, method = "rf", preProcess = c("center", "scale"), tuneGrid = grid, trControl = ctrl, family= "binomial", metric= "ROC" #定义要优化的指标 metric='RMSE' )rf_fit
回答:
你需要创建一个使用random forest包的自定义RF,然后包含你想要包含的参数。
customRF <- list(type = "Classification", library = "randomForest", loop = NULL)customRF$parameters <- data.frame(parameter = c("mtry", "ntree"), class = rep("numeric", 2), label = c("mtry", "ntree"))customRF$grid <- function(x, y, len = NULL, search = "grid") {}customRF$fit <- function(x, y, wts, param, lev, last, weights, classProbs, ...) { randomForest(x, y, mtry = param$mtry, ntree=param$ntree, ...)}customRF$predict <- function(modelFit, newdata, preProc = NULL, submodels = NULL) predict(modelFit, newdata)customRF$prob <- function(modelFit, newdata, preProc = NULL, submodels = NULL) predict(modelFit, newdata, type = "prob")customRF$sort <- function(x) x[order(x[,1]),]customRF$levels <- function(x) x$classescustomRF
然后你可以在train函数中使用方法[customRF]。