我使用了一个RData文件中提供的训练数据,以及我自己构建的一个包含我认为训练数据中所有列的数据框。
args = commandArgs(trailingOnly=TRUE)model = readRDS(args[1])m = model[[1]]infile = fread(newDataPath, header=T)setDF(infile)i = infile[,!colnames(infile) %in% c("chr", "pos", "end")]predictions = predict(m, i)
然而,运行这段代码时,我得到了训练数据中在newdata中缺失的变量
的错误提示。
通过colnames(i)
,我可以找到newdata
中的变量列表,但如何对训练数据做同样的事情呢?我认为训练数据是一个randomForest
类的对象?
回答:
你可以使用str
来查看模型的结构,以找到列名的位置。
我假设你使用的是randomForest
包,但对于其他模型的想法也是一样的。
library('randomForest')model <- randomForest(Species ~ ., data = iris, ntree=5)str(model)#> List of 19#> $ call : language randomForest(formula = Species ~ ., data = iris, ntree = 5)#> $ type : chr "classification"#> $ predicted : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...#> ..- attr(*, "names")= chr [1:150] "1" "2" "3" "4" ...#> $ err.rate : num [1:5, 1:4] 0.0862 0.0753 0.114 0.0714 0.0833 ...#> ..- attr(*, "dimnames")=List of 2#> .. ..$ : NULL#> .. ..$ : chr [1:4] "OOB" "setosa" "versicolor" "virginica"#> $ confusion : num [1:3, 1:4] 45 0 0 0 41 8 0 3 35 0 ...#> ..- attr(*, "dimnames")=List of 2#> .. ..$ : chr [1:3] "setosa" "versicolor" "virginica"#> .. ..$ : chr [1:4] "setosa" "versicolor" "virginica" "class.error"#> $ votes : matrix [1:150, 1:3] 1 1 1 1 1 1 1 1 1 1 ...#> ..- attr(*, "dimnames")=List of 2#> .. ..$ : chr [1:150] "1" "2" "3" "4" ...#> .. ..$ : chr [1:3] "setosa" "versicolor" "virginica"#> $ oob.times : num [1:150] 1 2 1 1 3 1 2 2 2 2 ...#> $ classes : chr [1:3] "setosa" "versicolor" "virginica"#> $ importance : num [1:4, 1] 20.53 4.33 19.17 55.25#> ..- attr(*, "dimnames")=List of 2#> .. ..$ : chr [1:4] "Sepal.Length" "Sepal.Width" "Petal.Length" "Petal.Width"#> .. ..$ : chr "MeanDecreaseGini"#> $ importanceSD : NULL#> $ localImportance: NULL#> $ proximity : NULL#> $ ntree : num 5#> $ mtry : num 2#> $ forest :List of 14#> ..$ ndbigtree : int [1:5] 9 17 35 11 19#> ..$ nodestatus: int [1:35, 1:5] 1 1 -1 -1 1 1 -1 -1 -1 0 ...#> ..$ bestvar : int [1:35, 1:5] 4 4 0 0 2 3 0 0 0 0 ...#> ..$ treemap : int [1:35, 1:2, 1:5] 2 4 0 0 6 8 0 0 0 0 ...#> ..$ nodepred : int [1:35, 1:5] 0 0 3 1 0 0 2 2 3 0 ...#> ..$ xbestsplit: num [1:35, 1:5] 1.65 0.8 0 0 2.25 4.75 0 0 0 0 ...#> ..$ pid : num [1:3] 1 1 1#> ..$ cutoff : num [1:3] 0.333 0.333 0.333#> ..$ ncat : Named int [1:4] 1 1 1 1#> .. ..- attr(*, "names")= chr [1:4] "Sepal.Length" "Sepal.Width" "Petal.Length" "Petal.Width"#> ..$ maxcat : int 1#> ..$ nrnodes : int 35#> ..$ ntree : num 5#> ..$ nclass : int 3#> ..$ xlevels :List of 4#> .. ..$ Sepal.Length: num 0#> .. ..$ Sepal.Width : num 0#> .. ..$ Petal.Length: num 0#> .. ..$ Petal.Width : num 0#> $ y : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...#> ..- attr(*, "names")= chr [1:150] "1" "2" "3" "4" ...#> $ test : NULL#> $ inbag : NULL#> $ terms :Classes 'terms', 'formula' language Species ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width#> .. ..- attr(*, "variables")= language list(Species, Sepal.Length, Sepal.Width, Petal.Length, Petal.Width)#> .. ..- attr(*, "factors")= int [1:5, 1:4] 0 1 0 0 0 0 0 1 0 0 ...#> .. .. ..- attr(*, "dimnames")=List of 2#> .. .. .. ..$ : chr [1:5] "Species" "Sepal.Length" "Sepal.Width" "Petal.Length" ...#> .. .. .. ..$ : chr [1:4] "Sepal.Length" "Sepal.Width" "Petal.Length" "Petal.Width"#> .. ..- attr(*, "term.labels")= chr [1:4] "Sepal.Length" "Sepal.Width" "Petal.Length" "Petal.Width"#> .. ..- attr(*, "order")= int [1:4] 1 1 1 1#> .. ..- attr(*, "intercept")= num 0#> .. ..- attr(*, "response")= int 1#> .. ..- attr(*, ".Environment")=<environment: 0x7f9bed91f8d8> #> .. ..- attr(*, "predvars")= language list(Species, Sepal.Length, Sepal.Width, Petal.Length, Petal.Width)#> .. ..- attr(*, "dataClasses")= Named chr [1:5] "factor" "numeric" "numeric" "numeric" ...#> .. .. ..- attr(*, "names")= chr [1:5] "Species" "Sepal.Length" "Sepal.Width" "Petal.Length" ...#> - attr(*, "class")= chr [1:2] "randomForest.formula" "randomForest"attr(model$terms, 'term.labels')#> [1] "Sepal.Length" "Sepal.Width" "Petal.Length" "Petal.Width"attr(model$terms, 'dataClasses')#> Species Sepal.Length Sepal.Width Petal.Length Petal.Width #> "factor" "numeric" "numeric" "numeric" "numeric"